Bug 762186 (GLUSTER-454) - crashes in callbacks of fops like lookup, readdirp in protocol/client
Summary: crashes in callbacks of fops like lookup, readdirp in protocol/client
Keywords:
Status: CLOSED WORKSFORME
Alias: GLUSTER-454
Product: GlusterFS
Classification: Community
Component: protocol
Version: mainline
Hardware: All
OS: Linux
low
low
Target Milestone: ---
Assignee: Raghavendra G
QA Contact:
URL:
Whiteboard:
Depends On:
Blocks:
TreeView+ depends on / blocked
 
Reported: 2009-12-08 19:01 UTC by Raghavendra G
Modified: 2015-12-01 16:45 UTC (History)
2 users (show)

Fixed In Version:
Doc Type: Bug Fix
Doc Text:
Clone Of:
Environment:
Last Closed:
Regression: ---
Mount Type: ---
Documentation: ---
CRM:
Verified Versions:


Attachments (Terms of Use)
volume spec file (2.40 KB, application/octet-stream)
2009-12-08 16:01 UTC, Raghavendra G
no flags Details
glusterfs log file (149.55 KB, application/octet-stream)
2009-12-08 16:02 UTC, Raghavendra G
no flags Details

Description Raghavendra G 2009-12-08 16:02:26 UTC
Created attachment 117 [details]
sound module loaded at boot-time

The logfile contains many runs and the crash is logged in the last but one run of glusterfs.

Comment 1 Raghavendra G 2009-12-08 19:01:01 UTC
I observed atleast two crashes in protocol/client in client_readdirp_cbk and client_lookup_cbk. Both of these were crashes were due to frame being passed as argument already freed.

The guess I am doing is that either in replicate or distribute, STACK_DESTROY was called directly or indirectly through STACK_UNWIND on frame->root.

I've attached the logfiles for crash in client_lookup_cbk.

Here is the backtrace of crash in client_lookup_cbk.

#0  0xb7ebee48 in gf_print_trace (signum=11) at ../../../libglusterfs/src/common-utils.c:413
        tmp = (call_frame_t *) 0xb02774cc
        ctx = (glusterfs_ctx_t *) 0x8051008
        trav = (struct list_head *) 0xb0277458
        tm = (struct tm *) 0x0
        msg = "frame : type(1) op(WRITE)\n", '\0' <repeats 997 times>
        timestr = '\0' <repeats 255 times>
        utime = 0
        ret = 26
        fd = 4
#1  <signal handler called>
No symbol table info available.
#2  0xb7e8f276 in pthread_spin_lock () from /lib/libpthread.so.0
No symbol table info available.
#3  0xb7ec5524 in inode_ctx_get2 (inode=0xa9, xlator=0x58a68b7, value1=0xad1ff158, value2=0xad1ff150)
    at ../../../libglusterfs/src/inode.c:1213
        ret = 0
#4  0xb74a3495 in client_lookup_cbk (frame=0xaa0d8287, hdr=0x8309188, hdrlen=504, iobuf=0x0)
    at ../../../../../xlators/protocol/client/src/client-protocol.c:4897
        stbuf = {st_dev = 5412414050074099713, __pad1 = 0, __st_ino = 0, st_mode = 16877, st_nlink = 3, 
  st_uid = 0, st_gid = 0, st_rdev = 0, __pad2 = 0, st_size = 4096, st_blksize = 4096, st_blocks = 16, st_atim = {
    tv_sec = 1260236810, tv_nsec = 0}, st_mtim = {tv_sec = 1260176040, tv_nsec = 0}, st_ctim = {
    tv_sec = 1260236804, tv_nsec = 0}, st_ino = 1}
        postparent = {st_dev = 0, __pad1 = 0, __st_ino = 0, st_mode = 0, st_nlink = 0, st_uid = 0, st_gid = 0, 
  st_rdev = 0, __pad2 = 0, st_size = 0, st_blksize = 0, st_blocks = 0, st_atim = {tv_sec = 0, tv_nsec = 0}, 
  st_mtim = {tv_sec = 0, tv_nsec = 0}, st_ctim = {tv_sec = 0, tv_nsec = 0}, st_ino = 0}
        inode = (inode_t *) 0xa9
        xattr = (dict_t *) 0x0
        rsp = (gf_fop_lookup_rsp_t *) 0x83091f4
        op_ret = -1
        op_errno = 0
        dict_len = 0
        dictbuf = 0x0
        ret = -1
        gf_errno = 0
        local = (client_local_t *) 0xa9d070b7
        oldino = 0
        oldgen = 0
        __FUNCTION__ = "client_lookup_cbk"
#5  0xb74a79d2 in protocol_client_interpret (this=0x8058a68, trans=0x80941b8, hdr_p=0x8309188 "", hdrlen=504, 
    iobuf=0x0) at ../../../../../xlators/protocol/client/src/client-protocol.c:6507
        ret = -1
        frame = (call_frame_t *) 0xaa0d8287
        hdr = (gf_hdr_common_t *) 0x8309188
        callid = 236320
        type = 4
        op = 27
        conn = (client_connection_t *) 0x80944c0
        __FUNCTION__ = "protocol_client_interpret"
#6  0xb74a85ee in protocol_client_pollin (this=0x8058a68, trans=0x80941b8)
    at ../../../../../xlators/protocol/client/src/client-protocol.c:6805
        conf = (client_conf_t *) 0x8093ce0
        ret = 0
        iobuf = (struct iobuf *) 0x0
        hdr = 0x8309188 ""
        hdrlen = 504
#7  0xb74a8abd in notify (this=0x8058a68, event=2, data=0x80941b8)
    at ../../../../../xlators/protocol/client/src/client-protocol.c:6924
        i = 0
        ret = -1
        child_down = 1
        was_not_down = 0
        trans = (transport_t *) 0x80941b8
        conn = (client_connection_t *) 0x0
        conf = (client_conf_t *) 0x8093ce0
        parent = (xlator_list_t *) 0x0
        __FUNCTION__ = "notify"
#8  0xb7eb5720 in xlator_notify (xl=0x8058a68, event=2, data=0x80941b8) at ../../../libglusterfs/src/xlator.c:923
        old_THIS = (xlator_t *) 0xb7ee7ba0
        ret = 0
#9  0xb7ec2230 in transport_peerproc (trans_data=0x80941b8) at ../../../libglusterfs/src/transport.c:414
        trans = (transport_t *) 0x80941b8
        msg = (struct transport_msg *) 0x8174d70
#10 0xb7e8a383 in start_thread () from /lib/libpthread.so.0
No symbol table info available.
#11 0xb7e0f05e in clone () from /lib/libc.so.6
No symbol table info available.
(gdb) p *frame
$1 = {root = 0xab029000, parent = 0x66a58b7, next = 0x66a58aa, prev = 0xa6dac0aa, local = 0x0, this = 0x58a68b7, 
  ret = 0x44dd5408, ref_count = 183, lock = 256, cookie = 0x0, complete = _gf_false}
(gdb) p *frame->this
Cannot access memory at address 0x58a68b7


And here is the backtrace of crash in client_readdirp_cbk.

#0  0xb7553208 in client_readdirp_cbk (frame=0xa89072a7, hdr=0xa89c01e0, hdrlen=1316, iobuf=0x0)
    at ../../../../../xlators/protocol/client/src/client-protocol.c:4461
        fn = (ret_fn_t) 0x5093b008
        _parent = (call_frame_t *) 0x994668a8
        old_THIS = (xlator_t *) 0x0
        rsp = (gf_fop_readdirp_rsp_t *) 0xa89c024c
        op_ret = 11
        op_errno = 0
        buf_size = 1204
        entries = {{list = {next = 0xa8702268, prev = 0xa8715bc0}, {next = 0xa8702268, prev = 0xa8715bc0}}, 
  d_ino = 12773734187154396736, d_off = 7486, d_len = 2828271320, d_type = 3075854596, d_stat = {
    st_dev = 12147639625727274734, __pad1 = 22120, __st_ino = 3086215132, st_mode = 2974114408, 
    st_nlink = 3075854656, st_uid = 1, st_gid = 2974116752, st_rdev = 13210695157743444568, __pad2 = 0, 
    st_size = -5673020022532510832, st_blksize = -1219209821, st_blocks = 134829584, st_atim = {tv_sec = 0, 
      tv_nsec = -1219209931}, st_mtim = {tv_sec = 7486, tv_nsec = 0}, st_ctim = {tv_sec = 0, tv_nsec = 4}, 
    st_ino = 0}, d_name = 0xb1456248 "L\002\234ยจ\v"}
#1  0xb75589d2 in protocol_client_interpret (this=0x80580f8, trans=0x8095308, hdr_p=0xa89c01e0 "", hdrlen=1316, 
    iobuf=0x0) at ../../../../../xlators/protocol/client/src/client-protocol.c:6507
        ret = -1
        frame = (call_frame_t *) 0xa89072a7
        hdr = (gf_hdr_common_t *) 0xa89c01e0
        callid = 7486
        type = 4
        op = 44
        conn = (client_connection_t *) 0x8095610
        __FUNCTION__ = "protocol_client_interpret"
#2  0xb75595ee in protocol_client_pollin (this=0x80580f8, trans=0x8095308)
    at ../../../../../xlators/protocol/client/src/client-protocol.c:6805
        conf = (client_conf_t *) 0x8094e30
        ret = 0
        iobuf = (struct iobuf *) 0x0
        hdr = 0xa89c01e0 ""
        hdrlen = 1316
#3  0xb7559abd in notify (this=0x80580f8, event=2, data=0x8095308)
    at ../../../../../xlators/protocol/client/src/client-protocol.c:6924
        i = 0
        ret = -1
        child_down = 1
        was_not_down = 0
        trans = (transport_t *) 0x8095308
        conn = (client_connection_t *) 0x0
        conf = (client_conf_t *) 0x8094e30
        parent = (xlator_list_t *) 0x0
        __FUNCTION__ = "notify"
#4  0xb7f66720 in xlator_notify (xl=0x80580f8, event=2, data=0x8095308) at ../../../libglusterfs/src/xlator.c:923
        old_THIS = (xlator_t *) 0xb7f98ba0
        ret = 0
#5  0xb7f73230 in transport_peerproc (trans_data=0x8095308) at ../../../libglusterfs/src/transport.c:414
        trans = (transport_t *) 0x8095308
        msg = (struct transport_msg *) 0xa8989920
#6  0xb7f3b383 in start_thread () from /lib/libpthread.so.0
No symbol table info available.
#7  0xb7ec005e in clone () from /lib/libc.so.6
No symbol table info available.

(gdb) p *frame
$1 = {root = 0x99018000, parent = 0x994668a8, next = 0x994668a8, prev = 0x9901f4a8, local = 0xa8, 
  this = 0x580f800, ret = 0x5093b008, ref_count = 183, lock = 256, cookie = 0x0, complete = _gf_false}
(gdb) p *frame->this
Cannot access memory at address 0x580f800


I was running,
 while true; do dbench -t 100 10; done

on the mount point.

Comment 2 Raghavendra G 2010-02-11 07:11:03 UTC
This bug is no longer reproducible on latest git pull. Please reopen if the bug is found again.


Note You need to log in before you can comment on or make changes to this bug.