+++ This bug was initially created as a clone of Bug #1262345 +++ +++ This bug was initially created as a clone of Bug #1262291 +++ Description of problem: ----------------------- Trying to fetch the value of `replica.split-brain-status' attribute of a file in a volume causes the mount to hang. The following is using gdb on the mount process - Thread 5 (Thread 0x7facd0a94700 (LWP 24886)): #0 0x00007facdc8a463c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 #1 0x00007facdd7fe02b in __syncbarrier_wait (barrier=0x7facc94991c0, waitfor=2) at syncop.c:1130 #2 syncbarrier_wait (barrier=0x7facc94991c0, waitfor=2) at syncop.c:1147 #3 0x00007faccbb8a98d in afr_selfheal_unlocked_discover_on (frame=<value optimized out>, inode=<value optimized out>, gfid=<value optimized out>, replies=0x7facd0a91590, discover_on=0x7faccc05ffb0 "\001\001\r", <incomplete sequence \360\255\272>) at afr-self-heal-common.c:754 #4 0x00007faccbb97b47 in afr_is_split_brain (frame=0x7facdb371474, this=0x7faccc008c90, inode=0x7facc9b18108, gfid=0x7facbc000ca0 "\370f\262,\031", d_spb=<value optimized out>, m_spb=0x7facd0a91e40) at afr-common.c:4850 #5 0x00007faccbb9a9f1 in afr_get_split_brain_status (frame=0x7facdb371474, this=0x7faccc008c90, loc=0x7facbc000c80) at afr-common.c:4900 #6 0x00007faccbb72eb0 in afr_getxattr (frame=0x7facdb371474, this=0x7faccc008c90, loc=0x7facbc000c80, name=<value optimized out>, xdata=0x0) at afr-inode-read.c:1492 #7 0x00007faccb92bf8c in dht_getxattr (frame=0x7facdb37187c, this=<value optimized out>, loc=0x7facbc000c80, key=<value optimized out>, xdata=0x0) at dht-common.c:3264 #8 0x00007facdd7bef3b in default_getxattr (frame=0x7facdb37187c, this=0x7faccc00b4d0, loc=0x7facbc000c80, name=0x7facbc0103a0 "replica.split-brain-status", xdata=<value optimized out>) at defaults.c:1970 #9 0x00007facdd7bef3b in default_getxattr (frame=0x7facdb37187c, this=0x7faccc00c920, loc=0x7facbc000c80, name=0x7facbc0103a0 "replica.split-brain-status", xdata=<value optimized out>) at defaults.c:1970 #10 0x00007facdd7bef3b in default_getxattr (frame=0x7facdb37187c, this=0x7faccc00dc80, loc=0x7facbc000c80, name=0x7facbc0103a0 "replica.split-brain-status", xdata=<value optimized out>) at defaults.c:1970 #11 0x00007facdd7bef3b in default_getxattr (frame=0x7facdb37187c, this=0x7faccc00f0d0, loc=0x7facbc000c80, name=0x7facbc0103a0 "replica.split-brain-status", xdata=<value optimized out>) at defaults.c:1970 #12 0x00007facdd7bef3b in default_getxattr (frame=0x7facdb37187c, this=0x7faccc010430, loc=0x7facbc000c80, name=0x7facbc0103a0 "replica.split-brain-status", xdata=<value optimized out>) at defaults.c:1970 #13 0x00007facdd7bef3b in default_getxattr (frame=0x7facdb37187c, this=0x7faccc0117e0, loc=0x7facbc000c80, name=0x7facbc0103a0 "replica.split-brain-status", xdata=<value optimized out>) at defaults.c:1970 #14 0x00007faccaa9ef6d in svc_getxattr (frame=0x7facdb37187c, this=<value optimized out>, loc=0x7facbc000c80, name=0x7facbc0103a0 "replica.split-brain-status", xdata=0x0) at snapview-client.c:888 #15 0x00007facca883715 in io_stats_getxattr (frame=0x7facdb371d30, this=0x7faccc015150, loc=0x7facbc000c80, name=0x7facbc0103a0 "replica.split-brain-status", xdata=0x0) at io-stats.c:2289 #16 0x00007facdd7bef3b in default_getxattr (frame=0x7facdb371d30, this=0x7faccc016690, loc=0x7facbc000c80, name=0x7facbc0103a0 "replica.split-brain-status", xdata=<value optimized out>) at defaults.c:1970 #17 0x00007facd4adf5c8 in fuse_getxattr_resume (state=0x7facbc000c60) at fuse-bridge.c:3425 #18 0x00007facd4ad35f5 in fuse_resolve_done (state=<value optimized out>) at fuse-resolve.c:644 #19 fuse_resolve_all (state=<value optimized out>) at fuse-resolve.c:671 #20 0x00007facd4ad3326 in fuse_resolve (state=0x7facbc000c60) at fuse-resolve.c:635 #21 0x00007facd4ad363e in fuse_resolve_all (state=<value optimized out>) at fuse-resolve.c:667 #22 0x00007facd4ad36a3 in fuse_resolve_continue (state=<value optimized out>) at fuse-resolve.c:687 #23 0x00007facd4ad38a1 in fuse_resolve_gfid_cbk (frame=<value optimized out>, cookie=<value optimized out>, this=0x7facdf78a690, op_ret=0, op_errno=<value optimized out>, inode=<value optimized out>, buf=0x7facc9b688f8, xattr=0x7facdad6ac74, postparent=0x7facc9b68b28) at fuse-resolve.c:169 #24 0x00007facca8904d3 in io_stats_lookup_cbk (frame=0x7facdb371d30, cookie=<value optimized out>, this=<value optimized out>, op_ret=0, op_errno=117, inode=0x7facc8d091a4, buf=0x7facc9b688f8, xdata=0x7facdad6ac74, postparent=0x7facc9b68b28) at io-stats.c:1512 #25 0x00007faccaaa6138 in svc_lookup_cbk (frame=0x7facdb37187c, cookie=<value optimized out>, this=<value optimized out>, op_ret=0, op_errno=117, inode=0x7facc8d091a4, buf=0x7facc9b688f8, ---Type <return> to continue, or q <return> to quit--- xdata=0x7facdad6ac74, postparent=0x7facc9b68b28) at snapview-client.c:371 #26 0x00007faccaeb8554 in qr_lookup_cbk (frame=0x7facdb371474, cookie=<value optimized out>, this=<value optimized out>, op_ret=0, op_errno=117, inode_ret=0x7facc8d091a4, buf=0x7facc9b688f8, xdata=0x7facdad6ac74, postparent=0x7facc9b68b28) at quick-read.c:446 #27 0x00007faccb0c4d45 in ioc_lookup_cbk (frame=0x7facdb3711c4, cookie=<value optimized out>, this=<value optimized out>, op_ret=0, op_errno=117, inode=0x7facc8d091a4, stbuf=0x7facc9b688f8, xdata=0x7facdad6ac74, postparent=0x7facc9b68b28) at io-cache.c:260 #28 0x00007faccb927d40 in dht_discover_complete (this=<value optimized out>, discover_frame=<value optimized out>) at dht-common.c:306 #29 0x00007faccb9323bc in dht_discover_cbk (frame=0x7facdb1a56d4, cookie=0x7facdb37131c, this=0x7faccc00a0f0, op_ret=<value optimized out>, op_errno=117, inode=0x7facc8d091a4, stbuf=0x7facbc011198, xattr=0x7facdad6ac74, postparent=0x7facbc011208) at dht-common.c:441 #30 0x00007faccbba2fd5 in afr_discover_done (frame=<value optimized out>, cookie=<value optimized out>, this=<value optimized out>, op_ret=<value optimized out>, op_errno=<value optimized out>, inode=<value optimized out>, buf=0x7facd0a93ab0, xdata=0x7facdad6ac74, postparent=0x7facd0a93a40) at afr-common.c:2115 #31 afr_discover_cbk (frame=<value optimized out>, cookie=<value optimized out>, this=<value optimized out>, op_ret=<value optimized out>, op_errno=<value optimized out>, inode=<value optimized out>, buf=0x7facd0a93ab0, xdata=0x7facdad6ac74, postparent=0x7facd0a93a40) at afr-common.c:2150 #32 0x00007faccbde00f7 in client3_3_lookup_cbk (req=<value optimized out>, iov=<value optimized out>, count=<value optimized out>, myframe=0x7facdb371e88) at client-rpc-fops.c:2978 #33 0x00007facdd584455 in rpc_clnt_handle_reply (clnt=0x7faccc0bdc60, pollin=0x7faccc0cf430) at rpc-clnt.c:766 #34 0x00007facdd585981 in rpc_clnt_notify (trans=<value optimized out>, mydata=0x7faccc0bdc90, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x7faccc0cf430) at rpc-clnt.c:907 #35 0x00007facdd580ad8 in rpc_transport_notify (this=<value optimized out>, event=<value optimized out>, data=<value optimized out>) at rpc-transport.c:543 #36 0x00007facd20bb265 in socket_event_poll_in (this=0x7faccc0cd7f0) at socket.c:2290 #37 0x00007facd20bce3d in socket_event_handler (fd=<value optimized out>, idx=<value optimized out>, data=0x7faccc0cd7f0, poll_in=1, poll_out=0, poll_err=0) at socket.c:2403 #38 0x00007facdd81ab20 in event_dispatch_epoll_handler (data=0x7facdf7c37a0) at event-epoll.c:575 #39 event_dispatch_epoll_worker (data=0x7facdf7c37a0) at event-epoll.c:678 #40 0x00007facdc8a0a51 in start_thread () from /lib64/libpthread.so.0 #41 0x00007facdc20a9ad in clone () from /lib64/libc.so.6 Version-Release number of selected component (if applicable): ------------------------------------------------------------- glusterfs-3.7.1-14.el6.x86_64 How reproducible: ----------------- Always Steps to Reproduce: ------------------- 1. Create a replicate volume and mount it on a client via fuse. 2. Run the following command to get the value of attribute `replica.split-brain-status' of a file. Actual results: --------------- The mount was found to hang. Expected results: ----------------- The mount should not hang. --- Additional comment from Vijay Bellur on 2015-09-11 09:18:15 EDT --- REVIEW: http://review.gluster.org/12163 (afr : get split-brain-status in a synctask) posted (#1) for review on master by Anuradha Talur (atalur) --- Additional comment from Vijay Bellur on 2015-09-11 09:36:29 EDT --- REVIEW: http://review.gluster.org/12163 (afr : get split-brain-status in a synctask) posted (#2) for review on master by Anuradha Talur (atalur) --- Additional comment from Vijay Bellur on 2015-09-12 00:45:43 EDT --- REVIEW: http://review.gluster.org/12163 (afr : get split-brain-status in a synctask) posted (#3) for review on master by Anuradha Talur (atalur) --- Additional comment from Vijay Bellur on 2015-09-12 04:16:37 EDT --- REVIEW: http://review.gluster.org/12163 (afr : get split-brain-status in a synctask) posted (#4) for review on master by Pranith Kumar Karampuri (pkarampu)
REVIEW: http://review.gluster.org/12166 (afr : get split-brain-status in a synctask) posted (#1) for review on release-3.7 by Anuradha Talur (atalur)
REVIEW: http://review.gluster.org/12166 (afr : get split-brain-status in a synctask) posted (#2) for review on release-3.7 by Anuradha Talur (atalur)
REVIEW: http://review.gluster.org/12166 (afr : get split-brain-status in a synctask) posted (#3) for review on release-3.7 by Ravishankar N (ravishankar)
COMMIT: http://review.gluster.org/12166 committed in release-3.7 by Pranith Kumar Karampuri (pkarampu) ------ commit 57dfa97d4f84d426969591d3c0c674bdd54de450 Author: Anuradha Talur <atalur> Date: Fri Sep 11 18:43:36 2015 +0530 afr : get split-brain-status in a synctask Backport of: http://review.gluster.org/#/c/12163/ On executing `getfattr -n replica.split-brain-status <file>` on mount, there is a possibility that the mount hangs. To avoid this hang, fetch the split-brain-status of a file in synctask. >Change-Id: I87b781419ffc63248f915325b845e3233143d385 >BUG: 1262345 >Signed-off-by: Anuradha Talur <atalur> Change-Id: I9f4f4b54e108d3a0017264353b8272e072170c16 BUG: 1262547 Signed-off-by: Anuradha Talur <atalur> Reviewed-on: http://review.gluster.org/12166 Tested-by: NetBSD Build System <jenkins.org> Tested-by: Gluster Build System <jenkins.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu>
This bug is getting closed because a release has been made available that should address the reported issue. In case the problem is still not fixed with glusterfs-glusterfs-3.7.5, please open a new bug report. glusterfs-glusterfs-3.7.5 has been announced on the Gluster mailinglists [1], packages for several distributions should become available in the near future. Keep an eye on the Gluster Users mailinglist [2] and the update infrastructure for your distribution. [1] http://www.gluster.org/pipermail/gluster-users/2015-October/023968.html [2] http://thread.gmane.org/gmane.comp.file-systems.gluster.user
This bug is getting closed because a release has been made available that should address the reported issue. In case the problem is still not fixed with glusterfs-3.7.5, please open a new bug report. glusterfs-3.7.5 has been announced on the Gluster mailinglists [1], packages for several distributions should become available in the near future. Keep an eye on the Gluster Users mailinglist [2] and the update infrastructure for your distribution. [1] http://www.gluster.org/pipermail/gluster-users/2015-October/023968.html [2] http://thread.gmane.org/gmane.comp.file-systems.gluster.user