Bug 762186 (GLUSTER-454)

Summary: crashes in callbacks of fops like lookup, readdirp in protocol/client
Product: [Community] GlusterFS Reporter: Raghavendra G <raghavendra>
Component: protocolAssignee: Raghavendra G <raghavendra>
Status: CLOSED WORKSFORME QA Contact:
Severity: low Docs Contact:
Priority: low    
Version: mainlineCC: gluster-bugs, shehjart
Target Milestone: ---   
Target Release: ---   
Hardware: All   
OS: Linux   
Whiteboard:
Fixed In Version: Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of: Environment:
Last Closed: Type: ---
Regression: --- Mount Type: ---
Documentation: --- CRM:
Verified Versions: Category: ---
oVirt Team: --- RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: --- Target Upstream Version:
Attachments:
Description Flags
volume spec file
none
glusterfs log file none

Description Raghavendra G 2009-12-08 16:02:26 UTC
Created attachment 117 [details]
sound module loaded at boot-time

The logfile contains many runs and the crash is logged in the last but one run of glusterfs.

Comment 1 Raghavendra G 2009-12-08 19:01:01 UTC
I observed atleast two crashes in protocol/client in client_readdirp_cbk and client_lookup_cbk. Both of these were crashes were due to frame being passed as argument already freed.

The guess I am doing is that either in replicate or distribute, STACK_DESTROY was called directly or indirectly through STACK_UNWIND on frame->root.

I've attached the logfiles for crash in client_lookup_cbk.

Here is the backtrace of crash in client_lookup_cbk.

#0  0xb7ebee48 in gf_print_trace (signum=11) at ../../../libglusterfs/src/common-utils.c:413
        tmp = (call_frame_t *) 0xb02774cc
        ctx = (glusterfs_ctx_t *) 0x8051008
        trav = (struct list_head *) 0xb0277458
        tm = (struct tm *) 0x0
        msg = "frame : type(1) op(WRITE)\n", '\0' <repeats 997 times>
        timestr = '\0' <repeats 255 times>
        utime = 0
        ret = 26
        fd = 4
#1  <signal handler called>
No symbol table info available.
#2  0xb7e8f276 in pthread_spin_lock () from /lib/libpthread.so.0
No symbol table info available.
#3  0xb7ec5524 in inode_ctx_get2 (inode=0xa9, xlator=0x58a68b7, value1=0xad1ff158, value2=0xad1ff150)
    at ../../../libglusterfs/src/inode.c:1213
        ret = 0
#4  0xb74a3495 in client_lookup_cbk (frame=0xaa0d8287, hdr=0x8309188, hdrlen=504, iobuf=0x0)
    at ../../../../../xlators/protocol/client/src/client-protocol.c:4897
        stbuf = {st_dev = 5412414050074099713, __pad1 = 0, __st_ino = 0, st_mode = 16877, st_nlink = 3, 
  st_uid = 0, st_gid = 0, st_rdev = 0, __pad2 = 0, st_size = 4096, st_blksize = 4096, st_blocks = 16, st_atim = {
    tv_sec = 1260236810, tv_nsec = 0}, st_mtim = {tv_sec = 1260176040, tv_nsec = 0}, st_ctim = {
    tv_sec = 1260236804, tv_nsec = 0}, st_ino = 1}
        postparent = {st_dev = 0, __pad1 = 0, __st_ino = 0, st_mode = 0, st_nlink = 0, st_uid = 0, st_gid = 0, 
  st_rdev = 0, __pad2 = 0, st_size = 0, st_blksize = 0, st_blocks = 0, st_atim = {tv_sec = 0, tv_nsec = 0}, 
  st_mtim = {tv_sec = 0, tv_nsec = 0}, st_ctim = {tv_sec = 0, tv_nsec = 0}, st_ino = 0}
        inode = (inode_t *) 0xa9
        xattr = (dict_t *) 0x0
        rsp = (gf_fop_lookup_rsp_t *) 0x83091f4
        op_ret = -1
        op_errno = 0
        dict_len = 0
        dictbuf = 0x0
        ret = -1
        gf_errno = 0
        local = (client_local_t *) 0xa9d070b7
        oldino = 0
        oldgen = 0
        __FUNCTION__ = "client_lookup_cbk"
#5  0xb74a79d2 in protocol_client_interpret (this=0x8058a68, trans=0x80941b8, hdr_p=0x8309188 "", hdrlen=504, 
    iobuf=0x0) at ../../../../../xlators/protocol/client/src/client-protocol.c:6507
        ret = -1
        frame = (call_frame_t *) 0xaa0d8287
        hdr = (gf_hdr_common_t *) 0x8309188
        callid = 236320
        type = 4
        op = 27
        conn = (client_connection_t *) 0x80944c0
        __FUNCTION__ = "protocol_client_interpret"
#6  0xb74a85ee in protocol_client_pollin (this=0x8058a68, trans=0x80941b8)
    at ../../../../../xlators/protocol/client/src/client-protocol.c:6805
        conf = (client_conf_t *) 0x8093ce0
        ret = 0
        iobuf = (struct iobuf *) 0x0
        hdr = 0x8309188 ""
        hdrlen = 504
#7  0xb74a8abd in notify (this=0x8058a68, event=2, data=0x80941b8)
    at ../../../../../xlators/protocol/client/src/client-protocol.c:6924
        i = 0
        ret = -1
        child_down = 1
        was_not_down = 0
        trans = (transport_t *) 0x80941b8
        conn = (client_connection_t *) 0x0
        conf = (client_conf_t *) 0x8093ce0
        parent = (xlator_list_t *) 0x0
        __FUNCTION__ = "notify"
#8  0xb7eb5720 in xlator_notify (xl=0x8058a68, event=2, data=0x80941b8) at ../../../libglusterfs/src/xlator.c:923
        old_THIS = (xlator_t *) 0xb7ee7ba0
        ret = 0
#9  0xb7ec2230 in transport_peerproc (trans_data=0x80941b8) at ../../../libglusterfs/src/transport.c:414
        trans = (transport_t *) 0x80941b8
        msg = (struct transport_msg *) 0x8174d70
#10 0xb7e8a383 in start_thread () from /lib/libpthread.so.0
No symbol table info available.
#11 0xb7e0f05e in clone () from /lib/libc.so.6
No symbol table info available.
(gdb) p *frame
$1 = {root = 0xab029000, parent = 0x66a58b7, next = 0x66a58aa, prev = 0xa6dac0aa, local = 0x0, this = 0x58a68b7, 
  ret = 0x44dd5408, ref_count = 183, lock = 256, cookie = 0x0, complete = _gf_false}
(gdb) p *frame->this
Cannot access memory at address 0x58a68b7


And here is the backtrace of crash in client_readdirp_cbk.

#0  0xb7553208 in client_readdirp_cbk (frame=0xa89072a7, hdr=0xa89c01e0, hdrlen=1316, iobuf=0x0)
    at ../../../../../xlators/protocol/client/src/client-protocol.c:4461
        fn = (ret_fn_t) 0x5093b008
        _parent = (call_frame_t *) 0x994668a8
        old_THIS = (xlator_t *) 0x0
        rsp = (gf_fop_readdirp_rsp_t *) 0xa89c024c
        op_ret = 11
        op_errno = 0
        buf_size = 1204
        entries = {{list = {next = 0xa8702268, prev = 0xa8715bc0}, {next = 0xa8702268, prev = 0xa8715bc0}}, 
  d_ino = 12773734187154396736, d_off = 7486, d_len = 2828271320, d_type = 3075854596, d_stat = {
    st_dev = 12147639625727274734, __pad1 = 22120, __st_ino = 3086215132, st_mode = 2974114408, 
    st_nlink = 3075854656, st_uid = 1, st_gid = 2974116752, st_rdev = 13210695157743444568, __pad2 = 0, 
    st_size = -5673020022532510832, st_blksize = -1219209821, st_blocks = 134829584, st_atim = {tv_sec = 0, 
      tv_nsec = -1219209931}, st_mtim = {tv_sec = 7486, tv_nsec = 0}, st_ctim = {tv_sec = 0, tv_nsec = 4}, 
    st_ino = 0}, d_name = 0xb1456248 "L\002\234ยจ\v"}
#1  0xb75589d2 in protocol_client_interpret (this=0x80580f8, trans=0x8095308, hdr_p=0xa89c01e0 "", hdrlen=1316, 
    iobuf=0x0) at ../../../../../xlators/protocol/client/src/client-protocol.c:6507
        ret = -1
        frame = (call_frame_t *) 0xa89072a7
        hdr = (gf_hdr_common_t *) 0xa89c01e0
        callid = 7486
        type = 4
        op = 44
        conn = (client_connection_t *) 0x8095610
        __FUNCTION__ = "protocol_client_interpret"
#2  0xb75595ee in protocol_client_pollin (this=0x80580f8, trans=0x8095308)
    at ../../../../../xlators/protocol/client/src/client-protocol.c:6805
        conf = (client_conf_t *) 0x8094e30
        ret = 0
        iobuf = (struct iobuf *) 0x0
        hdr = 0xa89c01e0 ""
        hdrlen = 1316
#3  0xb7559abd in notify (this=0x80580f8, event=2, data=0x8095308)
    at ../../../../../xlators/protocol/client/src/client-protocol.c:6924
        i = 0
        ret = -1
        child_down = 1
        was_not_down = 0
        trans = (transport_t *) 0x8095308
        conn = (client_connection_t *) 0x0
        conf = (client_conf_t *) 0x8094e30
        parent = (xlator_list_t *) 0x0
        __FUNCTION__ = "notify"
#4  0xb7f66720 in xlator_notify (xl=0x80580f8, event=2, data=0x8095308) at ../../../libglusterfs/src/xlator.c:923
        old_THIS = (xlator_t *) 0xb7f98ba0
        ret = 0
#5  0xb7f73230 in transport_peerproc (trans_data=0x8095308) at ../../../libglusterfs/src/transport.c:414
        trans = (transport_t *) 0x8095308
        msg = (struct transport_msg *) 0xa8989920
#6  0xb7f3b383 in start_thread () from /lib/libpthread.so.0
No symbol table info available.
#7  0xb7ec005e in clone () from /lib/libc.so.6
No symbol table info available.

(gdb) p *frame
$1 = {root = 0x99018000, parent = 0x994668a8, next = 0x994668a8, prev = 0x9901f4a8, local = 0xa8, 
  this = 0x580f800, ret = 0x5093b008, ref_count = 183, lock = 256, cookie = 0x0, complete = _gf_false}
(gdb) p *frame->this
Cannot access memory at address 0x580f800


I was running,
 while true; do dbench -t 100 10; done

on the mount point.

Comment 2 Raghavendra G 2010-02-11 07:11:03 UTC
This bug is no longer reproducible on latest git pull. Please reopen if the bug is found again.