Bug 806873

Summary: glusterd crashed when "replace-brick commit force" operation performed
Product: [Community] GlusterFS Reporter: Shwetha Panduranga <shwetha.h.panduranga>
Component: glusterdAssignee: krishnan parthasarathi <kparthas>
Status: CLOSED CURRENTRELEASE QA Contact:
Severity: high Docs Contact:
Priority: unspecified    
Version: mainlineCC: gluster-bugs, nsathyan, vbellur
Target Milestone: ---   
Target Release: ---   
Hardware: Unspecified   
OS: Unspecified   
Whiteboard:
Fixed In Version: glusterfs-3.4.0 Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of: Environment:
Last Closed: 2013-07-24 17:19:51 UTC Type: ---
Regression: --- Mount Type: ---
Documentation: --- CRM:
Verified Versions: Category: ---
oVirt Team: --- RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: --- Target Upstream Version:
Embargoed:
Bug Depends On:    
Bug Blocks: 817967    

Description Shwetha Panduranga 2012-03-26 12:14:40 UTC
Description of problem:
(gdb) bt full
#0  0x000000350f232885 in raise () from /lib64/libc.so.6
No symbol table info available.
#1  0x000000350f234065 in abort () from /lib64/libc.so.6
No symbol table info available.
#2  0x000000350f22b9fe in __assert_fail_base () from /lib64/libc.so.6
No symbol table info available.
#3  0x000000350f22bac0 in __assert_fail () from /lib64/libc.so.6
No symbol table info available.
#4  0x00007fa6cc6a7e03 in rb_generate_client_volfile (volinfo=0x230ef60, src_brickinfo=0x2313690) at glusterd-replace-brick.c:763
        priv = 0x23055e0
        this = 0x23014f0
        file = 0x230abe0
        filename = "/etc/glusterd/vols/dstore/rb_client.vol", '\000' <repeats 4056 times>
        ret = -1
        fd = 17
        ttype = 0x0
        __FUNCTION__ = "rb_generate_client_volfile"
        __PRETTY_FUNCTION__ = "rb_generate_client_volfile"
#5  0x00007fa6cc6a870f in rb_spawn_maintenance_client (volinfo=0x230ef60, src_brickinfo=0x2313690) at glusterd-replace-brick.c:977
        ret = -1
        __FUNCTION__ = "rb_spawn_maintenance_client"
#6  0x00007fa6cc6a9021 in rb_do_operation (volinfo=0x230ef60, src_brickinfo=0x2313690, dst_brickinfo=0x23160e0, op=GF_REPLACE_OP_COMMIT)
    at glusterd-replace-brick.c:1179
        ret = -1
        op_str = '\000' <repeats 255 times>
        this = 0x23014f0
        __FUNCTION__ = "rb_do_operation"
#7  0x00007fa6cc6aa011 in glusterd_op_replace_brick (dict=0x22e3b8c, rsp_dict=0x0) at glusterd-replace-brick.c:1501
        ret = 0
        ctx = 0x22e3d30
        replace_op = 6
        volinfo = 0x230ef60
        volname = 0x230a8a0 "dstore"
        this = 0x23014f0
        priv = 0x23055e0
        src_brick = 0x230aa60 "192.168.2.37:/export1/dstore1"
---Type <return> to continue, or q <return> to quit--- 
        dst_brick = 0x230aa10 "192.168.2.35:/export2/dstore2"
        src_brickinfo = 0x2313690
        dst_brickinfo = 0x23160e0
        __PRETTY_FUNCTION__ = "glusterd_op_replace_brick"
        __FUNCTION__ = "glusterd_op_replace_brick"
#8  0x00007fa6cc66a212 in glusterd_op_commit_perform (op=GD_OP_REPLACE_BRICK, dict=0x22e3b8c, op_errstr=0x7fff2f410588, rsp_dict=0x0) at glusterd-op-sm.c:2634
        ret = -1
        __FUNCTION__ = "glusterd_op_commit_perform"
#9  0x00007fa6cc66840d in glusterd_op_ac_send_commit_op (event=0x2308a80, ctx=0x2309210) at glusterd-op-sm.c:1917
        ret = 0
        proc = 0x0
        priv = 0x23055e0
        this = 0x23014f0
        dict = 0x22e3b8c
        op_dict = 0x0
        peerinfo = 0x0
        op_errstr = 0x0
        op = GD_OP_REPLACE_BRICK
        pending_count = 0
        __PRETTY_FUNCTION__ = "glusterd_op_ac_send_commit_op"
        __FUNCTION__ = "glusterd_op_ac_send_commit_op"
#10 0x00007fa6cc66e0c6 in glusterd_op_sm () at glusterd-op-sm.c:4250
        event = 0x2308a80
        tmp = 0x7fa6cc8e7db0
        ret = 0
        lock_err = 0
        handler = 0x7fa6cc668303 <glusterd_op_ac_send_commit_op>
        state = 0x7fa6cc8e5780
        event_type = GD_OP_EVENT_ALL_ACK
        __FUNCTION__ = "glusterd_op_sm"
        __PRETTY_FUNCTION__ = "glusterd_op_sm"
#11 0x00007fa6cc682d5e in glusterd3_1_stage_op_cbk (req=0x23235d8, iov=0x2323618, count=1, myframe=0x7fa6ce9abfe0) at glusterd-rpc-ops.c:918
        rsp = {uuid = "\262\065\342l\a'N\344\204E\002t\373H\213F", op = 10, op_ret = 0, op_errno = 0, op_errstr = 0x22cbe40 "", dict = {dict_len = 4, 
            dict_val = 0x22fd0a0 ""}}
        ret = 0
        op_ret = 0
---Type <return> to continue, or q <return> to quit---
        event_type = GD_OP_EVENT_RCVD_ACC
        peerinfo = 0x2321970
        dict = 0x22e3b38
        err_str = '\000' <repeats 2047 times>
        peer_str = 0x0
        __PRETTY_FUNCTION__ = "glusterd3_1_stage_op_cbk"
        __FUNCTION__ = "glusterd3_1_stage_op_cbk"
#12 0x00007fa6cfb5e9fc in rpc_clnt_handle_reply (clnt=0x2322230, pollin=0x2305150) at rpc-clnt.c:797
        conn = 0x2322260
        saved_frame = 0x232b988
        ret = 0
        req = 0x23235d8
        xid = 4
        __FUNCTION__ = "rpc_clnt_handle_reply"
#13 0x00007fa6cfb5ed99 in rpc_clnt_notify (trans=0x2322580, mydata=0x2322260, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x2305150) at rpc-clnt.c:916
        conn = 0x2322260
        clnt = 0x2322230
        ret = -1
        req_info = 0x0
        pollin = 0x2305150
        tv = {tv_sec = 0, tv_usec = 0}
#14 0x00007fa6cfb5ae7c in rpc_transport_notify (this=0x2322580, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x2305150) at rpc-transport.c:498
        ret = -1
        __FUNCTION__ = "rpc_transport_notify"
#15 0x00007fa6cc3ba270 in socket_event_poll_in (this=0x2322580) at socket.c:1686
        ret = 0
        pollin = 0x2305150
#16 0x00007fa6cc3ba7f4 in socket_event_handler (fd=10, idx=1, data=0x2322580, poll_in=1, poll_out=0, poll_err=0) at socket.c:1801
        this = 0x2322580
        priv = 0x2322970
        ret = 0
        __FUNCTION__ = "socket_event_handler"
#17 0x00007fa6cfdb65e0 in event_dispatch_epoll_handler (event_pool=0x22e33a0, events=0x2307e20, i=0) at event.c:794
        event_data = 0x2307e24
        handler = 0x7fa6cc3ba5d7 <socket_event_handler>
        data = 0x2322580
---Type <return> to continue, or q <return> to quit---
        idx = 1
        ret = -1
        __FUNCTION__ = "event_dispatch_epoll_handler"
#18 0x00007fa6cfdb6803 in event_dispatch_epoll (event_pool=0x22e33a0) at event.c:856
        events = 0x2307e20
        size = 1
        i = 0
        ret = 1
        __FUNCTION__ = "event_dispatch_epoll"
#19 0x00007fa6cfdb6b8e in event_dispatch (event_pool=0x22e33a0) at event.c:956
        ret = -1
        __FUNCTION__ = "event_dispatch"
#20 0x0000000000408057 in main (argc=1, argv=0x7fff2f411428) at glusterfsd.c:1650
        ctx = 0x22cb010
        ret = 0
        __FUNCTION__ = "main"


Version-Release number of selected component (if applicable):
mainline

How reproducible:
often

Steps to Reproduce:
1.create a replicate volume (1 x 3). Start volume
2.create fuse, nfs mounts and start write operations from mount point
3.perform "replace-brick <brick3> <new_brick> commit force"
  
Actual results:
crashes glusterd

Expected results:


Additional info:
glusterd log:-
---------------
pending frames:

patchset: git://git.gluster.com/glusterfs.git
signal received: 6
time of crash: 2012-03-26 22:49:10
configuration details:
argp 1
backtrace 1
dlfcn 1
fdatasync 1
libpthread 1
llistxattr 1
setfsid 1
spinlock 1
epoll.h 1
xattr.h 1
st_atim.tv_nsec 1
package-string: glusterfs 3git
/lib64/libc.so.6[0x350f232900]
/lib64/libc.so.6(gsignal+0x35)[0x350f232885]
/lib64/libc.so.6(abort+0x175)[0x350f234065]
/lib64/libc.so.6[0x350f22b9fe]
/lib64/libc.so.6(__assert_perror_fail+0x0)[0x350f22bac0]
/usr/local/lib/glusterfs/3git/xlator/mgmt/glusterd.so(+0x7ee03)[0x7f21dc9a1e03]
/usr/local/lib/glusterfs/3git/xlator/mgmt/glusterd.so(+0x7f70f)[0x7f21dc9a270f]
/usr/local/lib/glusterfs/3git/xlator/mgmt/glusterd.so(+0x80021)[0x7f21dc9a3021]
/usr/local/lib/glusterfs/3git/xlator/mgmt/glusterd.so(glusterd_op_replace_brick+0x834)[0x7f21dc9a4011]
/usr/local/lib/glusterfs/3git/xlator/mgmt/glusterd.so(glusterd_op_commit_perform+0xd9)[0x7f21dc964212]
/usr/local/lib/glusterfs/3git/xlator/mgmt/glusterd.so(+0x3f40d)[0x7f21dc96240d]
/usr/local/lib/glusterfs/3git/xlator/mgmt/glusterd.so(glusterd_op_sm+0x1ea)[0x7f21dc9680c6]
/usr/local/lib/glusterfs/3git/xlator/mgmt/glusterd.so(glusterd3_1_stage_op_cbk+0x45f)[0x7f21dc97cd5e]
/usr/local/lib/libgfrpc.so.0(rpc_clnt_handle_reply+0x211)[0x7f21dfe589fc]
/usr/local/lib/libgfrpc.so.0(rpc_clnt_notify+0x2d3)[0x7f21dfe58d99]
/usr/local/lib/libgfrpc.so.0(rpc_transport_notify+0x130)[0x7f21dfe54e7c]
/usr/local/lib/glusterfs/3git/rpc-transport/socket.so(socket_event_poll_in+0x54)[0x7f21dc6b4270]
/usr/local/lib/glusterfs/3git/rpc-transport/socket.so(socket_event_handler+0x21d)[0x7f21dc6b47f4]
/usr/local/lib/libglusterfs.so.0(+0x4e5e0)[0x7f21e00b05e0]
/usr/local/lib/libglusterfs.so.0(+0x4e803)[0x7f21e00b0803]
/usr/local/lib/libglusterfs.so.0(event_dispatch+0x88)[0x7f21e00b0b8e]
glusterd(main+0x238)[0x408057]
/lib64/libc.so.6(__libc_start_main+0xfd)[0x350f21ecdd]
glusterd[0x4040c9]

Comment 1 Vijay Bellur 2012-03-30 06:47:10 UTC
Fixed as part of 772845.

Comment 2 Shwetha Panduranga 2012-04-12 06:29:08 UTC
Verified on 3.3.0qa34. Works fine.