Description of problem: Created a 3x2 distributed-replicate volume with following options. Volume Name: vol Type: Distributed-Replicate Volume ID: d02487da-ef7b-47c3-8c86-5e435f76211f Status: Started Number of Bricks: 3 x 2 = 6 Transport-type: tcp Bricks: Brick1: dagobah:/data/export1 Brick2: dagobah:/data/export6 Brick3: dagobah:/data/export5 Brick4: dagobah:/data/export4 Brick5: dagobah:/data/export3 Brick6: dagobah:/data/export2 Options Reconfigured: performance.stat-prefetch: on features.limit-usage: /:7GB features.quota: on performance.read-ahead: on On a fuse client ran, for i in `seq 0 6`; do /opt/qa/tools/fs_mark -d . -D 4 -t 4 -N 500 -S $i; done Removed the /data/export1 directory. Fuse client crashed with following backtrace: Core was generated by `/usr/local/sbin/glusterfs --volfile-id=vol --volfile-server=dagobah mount/'. Program terminated with signal 6, Aborted. #0 0x00007ff7fae6d3a5 in __GI_raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64 64 ../nptl/sysdeps/unix/sysv/linux/raise.c: Transport endpoint is not connected. in ../nptl/sysdeps/unix/sysv/linux/raise.c (gdb) bt #0 0x00007ff7fae6d3a5 in __GI_raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64 #1 0x00007ff7fae70b0b in __GI_abort () at abort.c:92 #2 0x00007ff7fae65d4d in __GI___assert_fail (assertion=0x7ff7f74ef9e1 "0", file=<optimized out>, line=4618, function=<optimized out>) at assert.c:81 #3 0x00007ff7f74e1f37 in client3_1_inodelk (frame=0x7ff7fa0885ec, this=0x134e8c0, data=0x7fff5bcbeac0) at ../../../../../xlators/protocol/client/src/client3_1-fops.c:4616 #4 0x00007ff7f74c7374 in client_inodelk (frame=0x7ff7fa0885ec, this=0x134e8c0, volume=0x13556d0 "vol-replicate-0", loc=0x7ff7f40cf2fc, cmd=6, lock=0x7fff5bcbec70) at ../../../../../xlators/protocol/client/src/client.c:1592 #5 0x00007ff7f7286a6c in afr_nonblocking_inodelk (frame=0x7ff7f9e82148, this=0x1355990) at ../../../../../xlators/cluster/afr/src/afr-lk-common.c:1515 #6 0x00007ff7f7272448 in afr_sh_metadata_lock (frame=0x7ff7f9e82148, this=0x1355990) at ../../../../../xlators/cluster/afr/src/afr-self-heal-metadata.c:584 #7 0x00007ff7f72724ae in afr_self_heal_metadata (frame=0x7ff7f9e82148, this=0x1355990) at ../../../../../xlators/cluster/afr/src/afr-self-heal-metadata.c:600 #8 0x00007ff7f726b13c in afr_sh_missing_entries_done (frame=0x7ff7f9e82148, this=0x1355990) at ../../../../../xlators/cluster/afr/src/afr-self-heal-common.c:924 #9 0x00007ff7f7281bfc in afr_unlock_common_cbk (frame=0x7ff7f9e82148, cookie=0x1, this=0x1355990, op_ret=0, op_errno=0) at ../../../../../xlators/cluster/afr/src/afr-lk-common.c:544 #10 0x00007ff7f7282975 in afr_unlock_entrylk_cbk (frame=0x7ff7f9e82148, cookie=0x1, this=0x1355990, op_ret=0, op_errno=0) at ../../../../../xlators/cluster/afr/src/afr-lk-common.c:706 #11 0x00007ff7f74d29bc in client3_1_entrylk_cbk (req=0x7ff7efa88908, iov=0x7ff7efa88948, count=1, myframe=0x7ff7fa088dfc) at ../../../../../xlators/protocol/client/src/client3_1-fops.c:1307 #12 0x00007ff7fb620085 in rpc_clnt_handle_reply (clnt=0x7ff7f00ad710, pollin=0x7ff7f01fae10) at ../../../../rpc/rpc-lib/src/rpc-clnt.c:797 #13 0x00007ff7fb6203fc in rpc_clnt_notify (trans=0x7ff7f00bd240, mydata=0x7ff7f00ad740, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x7ff7f01fae10) at ../../../../rpc/rpc-lib/src/rpc-clnt.c:916 #14 0x00007ff7fb61c1c8 in rpc_transport_notify (this=0x7ff7f00bd240, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x7ff7f01fae10) at ../../../../rpc/rpc-lib/src/rpc-transport.c:498 #15 0x00007ff7f8110317 in socket_event_poll_in (this=0x7ff7f00bd240) at ../../../../../rpc/rpc-transport/socket/src/socket.c:1686 #16 0x00007ff7f8110880 in socket_event_handler (fd=9, idx=2, data=0x7ff7f00bd240, poll_in=1, poll_out=0, poll_err=0) at ../../../../../rpc/rpc-transport/socket/src/socket.c:1801 #17 0x00007ff7fb87669c in event_dispatch_epoll_handler (event_pool=0x133a380, events=0x1348750, i=0) at ../../../libglusterfs/src/event.c:794 #18 0x00007ff7fb8768af in event_dispatch_epoll (event_pool=0x133a380) at ../../../libglusterfs/src/event.c:856 #19 0x00007ff7fb876c22 in event_dispatch (event_pool=0x133a380) at ../../../libglusterfs/src/event.c:956 #20 0x0000000000408076 in main (argc=4, argv=0x7fff5bcbfb78) at ../../../glusterfsd/src/glusterfsd.c:1612 (gdb) f 3 #3 0x00007ff7f74e1f37 in client3_1_inodelk (frame=0x7ff7fa0885ec, this=0x134e8c0, data=0x7fff5bcbeac0) at ../../../../../xlators/protocol/client/src/client3_1-fops.c:4616 4616 GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, (gdb) p this $1 = (xlator_t *) 0x134e8c0 (gdb) p *this $2 = {name = 0x134f280 "vol-client-0", type = 0x134f370 "protocol/client", next = 0x0, prev = 0x1350170, parents = 0x1356b70, children = 0x0, options = 0x134f2c0, dlhandle = 0x134f440, fops = 0x7ff7f76f6520, cbks = 0x7ff7f76f6500, dumpops = 0x7ff7f76f67c0, volume_options = {next = 0x134fa80, prev = 0x7ff7f00e27c0}, fini = 0x7ff7f74ca5b5 <fini>, init = 0x7ff7f74ca303 <init>, reconfigure = 0x7ff7f74ca130 <reconfigure>, mem_acct_init = 0x7ff7f74c9a70 <mem_acct_init>, notify = 0x7ff7f74c965d <notify>, loglevel = GF_LOG_NONE, latencies = {{min = 0, max = 0, total = 0, std = 0, mean = 0, count = 0} <repeats 46 times>}, history = 0x0, ctx = 0x1322010, graph = 0x134a580, itable = 0x0, init_succeeded = 1 '\001', private = 0x7ff7f00be570, mem_acct = {num_types = 96, rec = 0x7ff7f00bd940}, winds = 0, switched = 0 '\000', local_pool = 0x7ff7f00be670} (gdb) p *this->options $3 = {is_static = 0 '\000', hash_size = 1, count = 11, refcount = 0, members = 0x134f330, members_list = 0x7ff7f00e3ca0, extra_free = 0x0, extra_stdfree = 0x0, lock = 1} (gdb) f 4 #4 0x00007ff7f74c7374 in client_inodelk (frame=0x7ff7fa0885ec, this=0x134e8c0, volume=0x13556d0 "vol-replicate-0", loc=0x7ff7f40cf2fc, cmd=6, lock=0x7fff5bcbec70) at ../../../../../xlators/protocol/client/src/client.c:1592 warning: Source file is more recent than executable. 1592 ret = proc->fn (frame, this, &args); (gdb) p ret $5 = -1 (gdb) f 3 #3 0x00007ff7f74e1f37 in client3_1_inodelk (frame=0x7ff7fa0885ec, this=0x134e8c0, data=0x7fff5bcbeac0) at ../../../../../xlators/protocol/client/src/client3_1-fops.c:4616 4616 GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, (gdb) p proc $7 = (rpc_clnt_procedure_t *) 0x7ff7f76f87f0 (gdb) p *proc $8 = {procname = 0x7ff7f74efb46 "INODELK", fn = 0x7ff7f74e1dd6 <client3_1_inodelk>} (gdb) p *proc->fn $9 = {int (call_frame_t *, xlator_t *, void *)} 0x7ff7f74e1dd6 <client3_1_inodelk> (gdb) p *(*proc->fn) $10 = {int (call_frame_t *, xlator_t *, void *)} 0x7ff7f74e1dd6 <client3_1_inodelk> (gdb) p *(*proc->)fn A syntax error in expression, near `)fn'. (gdb) p *(*proc->fn) $11 = {int (call_frame_t *, xlator_t *, void *)} 0x7ff7f74e1dd6 <client3_1_inodelk> (gdb) p &args $12 = (clnt_args_t *) 0x7fff5bcbeac0 (gdb) p args $13 = {loc = 0x7ff7f40cf2fc, fd = 0x0, xattr_req = 0x0, linkname = 0x0, iobref = 0x0, vector = 0x0, xattr = 0x0, stbuf = 0x0, dict = 0x0, oldloc = 0x0, newloc = 0x0, name = 0x0, flock = 0x7fff5bcbec70, volume = 0x13556d0 "vol-replicate-0", basename = 0x0, offset = 0, mask = 0, cmd = 6, size = 0, mode = 0, rdev = 0, flags = 0, wbflags = 0, count = 0, datasync = 0, cmd_entrylk = ENTRYLK_LOCK, type = ENTRYLK_RDLCK, optype = GF_XATTROP_ADD_ARRAY, valid = 0, len = 0} (gdb) p args.loc $14 = (loc_t *) 0x7ff7f40cf2fc (gdb) p *(args.loc) $15 = {path = 0x7ff7f02132a0 "/00/4f619625~~~~~~~~ID9P9WDGIH51036O2SXU43K5", name = 0x7ff7f02132a4 "4f619625~~~~~~~~ID9P9WDGIH51036O2SXU43K5", inode = 0x7ff7ef4ed1e0, parent = 0x7ff7ef51489c, gfid = '\000' <repeats 15 times>, pargfid = "W\336,\277\336\063I\251\243\266\362?\353ֆ\023"} client log: [2012-03-15 12:47:30.669314] I [afr-common.c:1320:afr_launch_self_heal] 0-vol-replicate-0: background meta-data data entry missing-entry gfi d self-heal triggered. path: /, reason: lookup detected pending operations [2012-03-15 12:47:30.671452] E [afr-self-heal-common.c:1007:afr_sh_common_lookup_resp_handler] 0-vol-replicate-0: path / on subvolume vol-cli ent-0 => -1 (No such file or directory) [2012-03-15 12:47:30.675457] I [afr-common.c:1195:afr_detect_self_heal_by_lookup_status] 0-vol-replicate-0: entries are missing in lookup of /. [2012-03-15 12:47:30.675499] I [afr-common.c:1320:afr_launch_self_heal] 0-vol-replicate-0: background meta-data data entry missing-entry gfi d self-heal triggered. path: /, reason: lookup detected pending operations [2012-03-15 12:47:30.677183] E [afr-self-heal-common.c:1007:afr_sh_common_lookup_resp_handler] 0-vol-replicate-0: path / on subvolume vol-cli ent-0 => -1 (No such file or directory) [2012-03-15 12:47:30.677267] I [afr-self-heal-entry.c:2332:afr_sh_entry_fix] 0-vol-replicate-0: /: Performing conservative merge [2012-03-15 12:47:30.677538] E [afr-self-heal-common.c:1007:afr_sh_common_lookup_resp_handler] 0-vol-replicate-0: path / on subvolume vol-cli ent-0 => -1 (No such file or directory) [2012-03-15 12:47:30.678099] W [client3_1-fops.c:2100:client3_1_opendir_cbk] 0-vol-client-0: remote operation failed: No such file or directo ry. Path: / [2012-03-15 12:47:30.678119] E [afr-self-heal-entry.c:2150:afr_sh_entry_opendir_cbk] 0-vol-replicate-0: opendir of / failed on child vol-clie nt-0 (No such file or directory) [2012-03-15 12:47:30.678461] E [afr-self-heal-common.c:2034:afr_self_heal_completion_cbk] 0-vol-replicate-0: background meta-data data entry self-heal failed on / [2012-03-15 12:47:30.679928] I [afr-common.c:1195:afr_detect_self_heal_by_lookup_status] 0-vol-replicate-0: entries are missing in lookup of /00. [2012-03-15 12:47:30.679954] I [afr-common.c:1320:afr_launch_self_heal] 0-vol-replicate-0: background meta-data data entry missing-entry gfi d self-heal triggered. path: /00, reason: lookup detected pending operations [2012-03-15 12:47:30.680112] E [afr-self-heal-common.c:1007:afr_sh_common_lookup_resp_handler] 0-vol-replicate-0: path / on subvolume vol-cli ent-0 => -1 (No such file or directory) [2012-03-15 12:47:30.680136] I [afr-self-heal-entry.c:2332:afr_sh_entry_fix] 0-vol-replicate-0: /: Performing conservative merge [2012-03-15 12:47:30.680386] I [afr-self-heal-common.c:1821:afr_sh_post_nb_entrylk_conflicting_sh_cbk] 0-vol-replicate-0: Non blocking entrylks failed. [2012-03-15 12:47:30.680406] I [afr-self-heal-common.c:917:afr_sh_missing_entries_done] 0-vol-replicate-0: split brain found, aborting selfheal of /00 [2012-03-15 12:47:30.680422] E [afr-self-heal-common.c:2034:afr_self_heal_completion_cbk] 0-vol-replicate-0: background meta-data data entry missing-entry gfid self-heal failed on /00 [2012-03-15 12:47:30.680740] W [client3_1-fops.c:2100:client3_1_opendir_cbk] 0-vol-client-0: remote operation failed: No such file or directory. Path: / [2012-03-15 12:47:30.680757] E [afr-self-heal-entry.c:2150:afr_sh_entry_opendir_cbk] 0-vol-replicate-0: opendir of / failed on child vol-client-0 (No such file or directory) [2012-03-15 12:47:30.681046] E [afr-self-heal-common.c:2034:afr_self_heal_completion_cbk] 0-vol-replicate-0: background meta-data data entry self-heal failed on / [2012-03-15 12:47:30.797965] I [afr-common.c:1195:afr_detect_self_heal_by_lookup_status] 0-vol-replicate-0: entries are missing in lookup of /00/4f619625~~~~~~~~ID9P9WDGIH51036O2SXU43K5. [2012-03-15 12:47:30.798013] I [afr-common.c:1320:afr_launch_self_heal] 0-vol-replicate-0: background meta-data data entry missing-entry gfid self-heal triggered. path: /00/4f619625~~~~~~~~ID9P9WDGIH51036O2SXU43K5, reason: lookup detected pending operations [2012-03-15 12:47:30.799094] E [afr-self-heal-common.c:1007:afr_sh_common_lookup_resp_handler] 0-vol-replicate-0: path /00 on subvolume vol-client-0 => -1 (No such file or directory) [2012-03-15 12:47:30.799238] I [afr-self-heal-common.c:1681:afr_sh_find_fresh_parents] 0-vol-replicate-0: Parent dir missing for /00/4f619625~~~~~~~~ID9P9WDGIH51036O2SXU43K5, in missing entry self-heal, aborting missing-entry self-heal pending frames: frame : type(1) op(CREATE) frame : type(1) op(CREATE) frame : type(1) op(FLUSH) frame : type(1) op(FLUSH) frame : type(1) op(WRITE) frame : type(1) op(LOOKUP) frame : type(1) op(LOOKUP) frame : type(1) op(FLUSH) frame : type(1) op(WRITE) frame : type(1) op(WRITE) frame : type(1) op(WRITE) patchset: git://git.gluster.com/glusterfs.git signal received: 6 time of crash: 2012-03-15 12:47:30 configuration details: argp 1 backtrace 1 dlfcn 1 fdatasync 1 libpthread 1 llistxattr 1 setfsid 1 spinlock 1 epoll.h 1 xattr.h 1 st_atim.tv_nsec 1 package-string: glusterfs 3git /lib/x86_64-linux-gnu/libc.so.6(+0x36420)[0x7ff7fae6d420] /lib/x86_64-linux-gnu/libc.so.6(gsignal+0x35)[0x7ff7fae6d3a5] /lib/x86_64-linux-gnu/libc.so.6(abort+0x17b)[0x7ff7fae70b0b] /lib/x86_64-linux-gnu/libc.so.6(__assert_fail+0xdd)[0x7ff7fae65d4d] /usr/local/lib/glusterfs/3git/xlator/protocol/client.so(client3_1_inodelk+0x161)[0x7ff7f74e1f37] /usr/local/lib/glusterfs/3git/xlator/protocol/client.so(client_inodelk+0x182)[0x7ff7f74c7374] /usr/local/lib/glusterfs/3git/xlator/cluster/replicate.so(afr_nonblocking_inodelk+0xc79)[0x7ff7f7286a6c] /usr/local/lib/glusterfs/3git/xlator/cluster/replicate.so(afr_sh_metadata_lock+0xb6)[0x7ff7f7272448] /usr/local/lib/glusterfs/3git/xlator/cluster/replicate.so(afr_self_heal_metadata+0x5f)[0x7ff7f72724ae] /usr/local/lib/glusterfs/3git/xlator/cluster/replicate.so(afr_sh_missing_entries_done+0x160)[0x7ff7f726b13c] /usr/local/lib/glusterfs/3git/xlator/cluster/replicate.so(+0x54bfc)[0x7ff7f7281bfc] /usr/local/lib/glusterfs/3git/xlator/cluster/replicate.so(+0x55975)[0x7ff7f7282975] /usr/local/lib/glusterfs/3git/xlator/protocol/client.so(client3_1_entrylk_cbk+0x2ba)[0x7ff7f74d29bc] /usr/local/lib/libgfrpc.so.0(rpc_clnt_handle_reply+0x20e)[0x7ff7fb620085] /usr/local/lib/libgfrpc.so.0(rpc_clnt_notify+0x2b4)[0x7ff7fb6203fc] /usr/local/lib/libgfrpc.so.0(rpc_transport_notify+0x115)[0x7ff7fb61c1c8] /usr/local/lib/glusterfs/3git/rpc-transport/socket.so(socket_event_poll_in+0x54)[0x7ff7f8110317] /usr/local/lib/glusterfs/3git/rpc-transport/socket.so(socket_event_handler+0x21d)[0x7ff7f8110880] /usr/local/lib/libglusterfs.so.0(+0x4d69c)[0x7ff7fb87669c] /usr/local/lib/libglusterfs.so.0(+0x4d8af)[0x7ff7fb8768af] /usr/local/lib/libglusterfs.so.0(event_dispatch+0x88)[0x7ff7fb876c22] /usr/local/sbin/glusterfs(main+0x238)[0x408076] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed)[0x7ff7fae5830d] /usr/local/sbin/glusterfs[0x403f69] ---------
Same back trace as the other bug. *** This bug has been marked as a duplicate of bug 800291 ***