Bug 1452205
| Summary: | glusterd on a node crashed after running volume profile command | |||
|---|---|---|---|---|
| Product: | [Red Hat Storage] Red Hat Gluster Storage | Reporter: | Prasad Desala <tdesala> | |
| Component: | glusterd | Assignee: | Atin Mukherjee <amukherj> | |
| Status: | CLOSED ERRATA | QA Contact: | Vinayak Papnoi <vpapnoi> | |
| Severity: | high | Docs Contact: | ||
| Priority: | unspecified | |||
| Version: | rhgs-3.3 | CC: | amukherj, rhinduja, rhs-bugs, storage-qa-internal, vbellur | |
| Target Milestone: | --- | |||
| Target Release: | RHGS 3.3.0 | |||
| Hardware: | Unspecified | |||
| OS: | Unspecified | |||
| Whiteboard: | ||||
| Fixed In Version: | glusterfs-3.8.4-28 | Doc Type: | If docs needed, set a value | |
| Doc Text: | Story Points: | --- | ||
| Clone Of: | ||||
| : | 1452956 1454612 (view as bug list) | Environment: | ||
| Last Closed: | 2017-09-21 04:43:23 UTC | Type: | Bug | |
| Regression: | --- | Mount Type: | --- | |
| Documentation: | --- | CRM: | ||
| Verified Versions: | Category: | --- | ||
| oVirt Team: | --- | RHEL 7.3 requirements from Atomic Host: | ||
| Cloudforms Team: | --- | Target Upstream Version: | ||
| Embargoed: | ||||
| Bug Depends On: | 1452956 | |||
| Bug Blocks: | 1417151 | |||
upstream patch : https://review.gluster.org/17350 downstream patch : https://code.engineering.redhat.com/gerrit/#/c/106947/ Build- 3.8.4-26
Followed the same steps as in the bug. After that restarted glusterd and did the same steps again. Glusterd crashed.
Here is the full bt of the core :
(gdb) bt full
#0 glusterd_op_ac_rcvd_brick_op_acc (event=<optimized out>, ctx=<optimized out>) at glusterd-op-sm.c:7570
ret = -1
ev_ctx = 0x0
op_errstr = 0x0
op = <optimized out>
type = <optimized out>
op_ctx = <optimized out>
req_ctx = <optimized out>
pending_entry = <optimized out>
this = 0x5634d1f86760
__FUNCTION__ = "glusterd_op_ac_rcvd_brick_op_acc"
#1 0x00007fc80c23902f in glusterd_op_sm () at glusterd-op-sm.c:8078
event = 0x7fc7f8008310
tmp = 0x7fc80c575570 <gd_op_sm_queue>
ret = <optimized out>
lock_err = 0
handler = <optimized out>
state = <optimized out>
event_type = GD_OP_EVENT_RCVD_ACC
this = 0x5634d1f86760
txn_op_info = {state = {state = GD_OP_STATE_BRICK_OP_SENT, time = {tv_sec = 0, tv_usec = 0}}, pending_count = 0, brick_pending_count = 1,
op_count = 0, op = 16, op_peers = {next = 0x0, prev = 0x0}, op_ctx = 0x7fc7fc0114a0, req = 0x7fc7f8009530, op_ret = 0, op_errno = 0,
op_errstr = 0x0, pending_bricks = {next = 0x7fc7f80146f0, prev = 0x7fc7f80146f0}, txn_generation = 4}
__FUNCTION__ = "glusterd_op_sm"
#2 0x00007fc80c269225 in __glusterd_stage_op_cbk (req=req@entry=0x7fc7f8006df0, iov=iov@entry=0x7fc7f8006e30, count=count@entry=1,
myframe=myframe@entry=0x7fc7f80147c0) at glusterd-rpc-ops.c:1279
rsp = {uuid = "4g\332\003[\372L]\276Y\234l^8?\\", op = 16, op_ret = 0, op_errno = 0, op_errstr = 0x7fc7f8020fe0 "", dict = {dict_len = 43,
dict_val = 0x7fc7f8011cf0 ""}}
ret = 0
op_ret = <optimized out>
event_type = GD_OP_EVENT_RCVD_ACC
peerinfo = <optimized out>
dict = 0x7fc7f8003370
err_str = '\000' <repeats 2047 times>
peer_str = <optimized out>
this = <optimized out>
priv = <optimized out>
---Type <return> to continue, or q <return> to quit---
txn_id = 0x7fc7f80148f0
frame = 0x7fc7f80147c0
__FUNCTION__ = "__glusterd_stage_op_cbk"
#3 0x00007fc80c26a3da in glusterd_big_locked_cbk (req=0x7fc7f8006df0, iov=0x7fc7f8006e30, count=1, myframe=0x7fc7f80147c0,
fn=0x7fc80c268d00 <__glusterd_stage_op_cbk>) at glusterd-rpc-ops.c:215
priv = 0x7fc817aa4050
ret = -1
#4 0x00007fc81753a840 in rpc_clnt_handle_reply (clnt=clnt@entry=0x5634d2012650, pollin=pollin@entry=0x7fc7f8005eb0) at rpc-clnt.c:794
conn = 0x5634d2012680
saved_frame = <optimized out>
ret = 0
req = 0x7fc7f8006df0
xid = 17
__FUNCTION__ = "rpc_clnt_handle_reply"
#5 0x00007fc81753ab27 in rpc_clnt_notify (trans=<optimized out>, mydata=0x5634d2012680, event=<optimized out>, data=0x7fc7f8005eb0)
at rpc-clnt.c:987
conn = 0x5634d2012680
clnt = 0x5634d2012650
ret = -1
req_info = 0x0
pollin = 0x7fc7f8005eb0
clnt_mydata = 0x0
old_THIS = 0x5634d1f86760
__FUNCTION__ = "rpc_clnt_notify"
#6 0x00007fc8175369e3 in rpc_transport_notify (this=this@entry=0x5634d2012850, event=event@entry=RPC_TRANSPORT_MSG_RECEIVED,
data=data@entry=0x7fc7f8005eb0) at rpc-transport.c:538
ret = -1
__FUNCTION__ = "rpc_transport_notify"
#7 0x00007fc80968a364 in socket_event_poll_in (this=this@entry=0x5634d2012850) at socket.c:2275
ret = <optimized out>
pollin = 0x7fc7f8005eb0
priv = <optimized out>
#8 0x00007fc80968c845 in socket_event_handler (fd=<optimized out>, idx=3, data=0x5634d2012850, poll_in=1, poll_out=0, poll_err=0) at socket.c:2411
this = 0x5634d2012850
priv = 0x5634d2012ef0
ret = 0
__FUNCTION__ = "socket_event_handler"
---Type <return> to continue, or q <return> to quit---
#9 0x00007fc8177cb450 in event_dispatch_epoll_handler (event=0x7fc8076dce80, event_pool=0x5634d1f75f50) at event-epoll.c:572
handler = 0x7fc80968c6a0 <socket_event_handler>
gen = 1
slot = 0x5634d1fb83d0
data = 0x5634d2012850
ret = -1
fd = 14
ev_data = 0x7fc8076dce84
idx = 3
#10 event_dispatch_epoll_worker (data=0x5634d1ffc1e0) at event-epoll.c:675
event = {events = 1, data = {ptr = 0x100000003, fd = 3, u32 = 3, u64 = 4294967299}}
ret = <optimized out>
ev_data = 0x5634d1ffc1e0
event_pool = 0x5634d1f75f50
myindex = 1
timetodie = 0
__FUNCTION__ = "event_dispatch_epoll_worker"
#11 0x00007fc8165d0e25 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#12 0x00007fc815e9d34d in clone () from /lib64/libc.so.6
No symbol table info available.
Hence, marking it as Failed_QA.
upstream patch : https://review.gluster.org/#/c/17478 downstream patch : https://code.engineering.redhat.com/gerrit/#/c/108396/ Build : 3.8.4-28 Followed the step in description. Glusterd is not crashing but it is showing a message as below: [root@dhcp43-38 ~]# gluster vol profile bug2 info | gluster vol profile bug3 info Another transaction could be in progress. Please try again after sometime. This is expected. Hence, moving it to VERIFIED. Since the problem described in this bug report should be resolved in a recent advisory, it has been closed with a resolution of ERRATA. For information on the advisory, and where to find the updated files, follow the link below. If the solution does not work for you, open a new bug report. https://access.redhat.com/errata/RHBA-2017:2774 |
Description of problem: ======================= glusterd on a node crashed after running volume profile command. Version-Release number of selected component (if applicable): 3.8.4-24.el7rhgs How reproducible: 1/1 Steps to Reproduce: =================== 1) Create two gluster volumes and start them. 2) Enable brick mux on the volumes. 3) start volume profile on the two volumes in a single command as below, glusterd crashes on the node where command is executed, gluster v profile <vol1> info | gluster v profile <vol2> info [2017-05-15 10:17:50.465730] I [MSGID: 106568] [glusterd-svc-mgmt.c:228:glusterd_svc_stop] 0-management: scrub service is stopped pending frames: frame : type(0) op(0) frame : type(0) op(0) frame : type(0) op(0) frame : type(0) op(0) frame : type(0) op(0) frame : type(0) op(0) patchset: git://git.gluster.com/glusterfs.git signal received: 11 time of crash: 2017-05-15 10:18:19 configuration details: argp 1 backtrace 1 dlfcn 1 libpthread 1 llistxattr 1 setfsid 1 spinlock 1 epoll.h 1 xattr.h 1 st_atim.tv_nsec 1 package-string: glusterfs 3.8.4 /lib64/libglusterfs.so.0(_gf_msg_backtrace_nomem+0xc2)[0x7f79eca6a0e2] /lib64/libglusterfs.so.0(gf_print_trace+0x324)[0x7f79eca73b04] /lib64/libc.so.6(+0x35250)[0x7f79eb14c250] /usr/lib64/glusterfs/3.8.4/xlator/mgmt/glusterd.so(+0x42120)[0x7f79e15b0120] /usr/lib64/glusterfs/3.8.4/xlator/mgmt/glusterd.so(+0x3e40f)[0x7f79e15ac40f] /usr/lib64/glusterfs/3.8.4/xlator/mgmt/glusterd.so(+0x6dcb5)[0x7f79e15dbcb5] /usr/lib64/glusterfs/3.8.4/xlator/mgmt/glusterd.so(+0x6ee6a)[0x7f79e15dce6a] /lib64/libgfrpc.so.0(rpc_clnt_handle_reply+0x90)[0x7f79ec833840] /lib64/libgfrpc.so.0(rpc_clnt_notify+0x1e7)[0x7f79ec833b27] /lib64/libgfrpc.so.0(rpc_transport_notify+0x23)[0x7f79ec82f9e3] /usr/lib64/glusterfs/3.8.4/rpc-transport/socket.so(+0x73b4)[0x7f79de9fc3b4] /usr/lib64/glusterfs/3.8.4/rpc-transport/socket.so(+0x9895)[0x7f79de9fe895] /lib64/libglusterfs.so.0(+0x83e00)[0x7f79ecac3e00] /lib64/libpthread.so.0(+0x7dc5)[0x7f79eb8c9dc5] /lib64/libc.so.6(clone+0x6d)[0x7f79eb20e73d] --------- (gdb) bt #0 glusterd_op_ac_rcvd_brick_op_acc (event=0x7f79d04449a0, ctx=0x0) at glusterd-op-sm.c:7544 #1 0x00007f79e15ac40f in glusterd_op_sm () at glusterd-op-sm.c:8091 #2 0x00007f79e15dbcb5 in __glusterd_stage_op_cbk (req=req@entry=0x7f79d006aaa0, iov=iov@entry=0x7f79d006aae0, count=count@entry=1, myframe=myframe@entry=0x7f79d0429580) at glusterd-rpc-ops.c:1279 #3 0x00007f79e15dce6a in glusterd_big_locked_cbk (req=0x7f79d006aaa0, iov=0x7f79d006aae0, count=1, myframe=0x7f79d0429580, fn=0x7f79e15db790 <__glusterd_stage_op_cbk>) at glusterd-rpc-ops.c:215 #4 0x00007f79ec833840 in rpc_clnt_handle_reply (clnt=clnt@entry=0x7f79ee704e00, pollin=pollin@entry=0x7f79d0427fe0) at rpc-clnt.c:794 #5 0x00007f79ec833b27 in rpc_clnt_notify (trans=<optimized out>, mydata=0x7f79ee704e30, event=<optimized out>, data=0x7f79d0427fe0) at rpc-clnt.c:987 #6 0x00007f79ec82f9e3 in rpc_transport_notify (this=this@entry=0x7f79ee705000, event=event@entry=RPC_TRANSPORT_MSG_RECEIVED, data=data@entry=0x7f79d0427fe0) at rpc-transport.c:538 #7 0x00007f79de9fc3b4 in socket_event_poll_in (this=this@entry=0x7f79ee705000) at socket.c:2275 #8 0x00007f79de9fe895 in socket_event_handler (fd=<optimized out>, idx=2, data=0x7f79ee705000, poll_in=1, poll_out=0, poll_err=0) at socket.c:2411 #9 0x00007f79ecac3e00 in event_dispatch_epoll_handler (event=0x7f79dca4fe80, event_pool=0x7f79ee5dd730) at event-epoll.c:572 #10 event_dispatch_epoll_worker (data=0x7f79ee6364d0) at event-epoll.c:675 #11 0x00007f79eb8c9dc5 in start_thread () from /lib64/libpthread.so.0 #12 0x00007f79eb20e73d in clone () from /lib64/libc.so.6 Actual results: =============== Glusterd crashed Expected results: ================= No crashes.