Hide Forgot
Created attachment 117 [details] sound module loaded at boot-time The logfile contains many runs and the crash is logged in the last but one run of glusterfs.
I observed atleast two crashes in protocol/client in client_readdirp_cbk and client_lookup_cbk. Both of these were crashes were due to frame being passed as argument already freed. The guess I am doing is that either in replicate or distribute, STACK_DESTROY was called directly or indirectly through STACK_UNWIND on frame->root. I've attached the logfiles for crash in client_lookup_cbk. Here is the backtrace of crash in client_lookup_cbk. #0 0xb7ebee48 in gf_print_trace (signum=11) at ../../../libglusterfs/src/common-utils.c:413 tmp = (call_frame_t *) 0xb02774cc ctx = (glusterfs_ctx_t *) 0x8051008 trav = (struct list_head *) 0xb0277458 tm = (struct tm *) 0x0 msg = "frame : type(1) op(WRITE)\n", '\0' <repeats 997 times> timestr = '\0' <repeats 255 times> utime = 0 ret = 26 fd = 4 #1 <signal handler called> No symbol table info available. #2 0xb7e8f276 in pthread_spin_lock () from /lib/libpthread.so.0 No symbol table info available. #3 0xb7ec5524 in inode_ctx_get2 (inode=0xa9, xlator=0x58a68b7, value1=0xad1ff158, value2=0xad1ff150) at ../../../libglusterfs/src/inode.c:1213 ret = 0 #4 0xb74a3495 in client_lookup_cbk (frame=0xaa0d8287, hdr=0x8309188, hdrlen=504, iobuf=0x0) at ../../../../../xlators/protocol/client/src/client-protocol.c:4897 stbuf = {st_dev = 5412414050074099713, __pad1 = 0, __st_ino = 0, st_mode = 16877, st_nlink = 3, st_uid = 0, st_gid = 0, st_rdev = 0, __pad2 = 0, st_size = 4096, st_blksize = 4096, st_blocks = 16, st_atim = { tv_sec = 1260236810, tv_nsec = 0}, st_mtim = {tv_sec = 1260176040, tv_nsec = 0}, st_ctim = { tv_sec = 1260236804, tv_nsec = 0}, st_ino = 1} postparent = {st_dev = 0, __pad1 = 0, __st_ino = 0, st_mode = 0, st_nlink = 0, st_uid = 0, st_gid = 0, st_rdev = 0, __pad2 = 0, st_size = 0, st_blksize = 0, st_blocks = 0, st_atim = {tv_sec = 0, tv_nsec = 0}, st_mtim = {tv_sec = 0, tv_nsec = 0}, st_ctim = {tv_sec = 0, tv_nsec = 0}, st_ino = 0} inode = (inode_t *) 0xa9 xattr = (dict_t *) 0x0 rsp = (gf_fop_lookup_rsp_t *) 0x83091f4 op_ret = -1 op_errno = 0 dict_len = 0 dictbuf = 0x0 ret = -1 gf_errno = 0 local = (client_local_t *) 0xa9d070b7 oldino = 0 oldgen = 0 __FUNCTION__ = "client_lookup_cbk" #5 0xb74a79d2 in protocol_client_interpret (this=0x8058a68, trans=0x80941b8, hdr_p=0x8309188 "", hdrlen=504, iobuf=0x0) at ../../../../../xlators/protocol/client/src/client-protocol.c:6507 ret = -1 frame = (call_frame_t *) 0xaa0d8287 hdr = (gf_hdr_common_t *) 0x8309188 callid = 236320 type = 4 op = 27 conn = (client_connection_t *) 0x80944c0 __FUNCTION__ = "protocol_client_interpret" #6 0xb74a85ee in protocol_client_pollin (this=0x8058a68, trans=0x80941b8) at ../../../../../xlators/protocol/client/src/client-protocol.c:6805 conf = (client_conf_t *) 0x8093ce0 ret = 0 iobuf = (struct iobuf *) 0x0 hdr = 0x8309188 "" hdrlen = 504 #7 0xb74a8abd in notify (this=0x8058a68, event=2, data=0x80941b8) at ../../../../../xlators/protocol/client/src/client-protocol.c:6924 i = 0 ret = -1 child_down = 1 was_not_down = 0 trans = (transport_t *) 0x80941b8 conn = (client_connection_t *) 0x0 conf = (client_conf_t *) 0x8093ce0 parent = (xlator_list_t *) 0x0 __FUNCTION__ = "notify" #8 0xb7eb5720 in xlator_notify (xl=0x8058a68, event=2, data=0x80941b8) at ../../../libglusterfs/src/xlator.c:923 old_THIS = (xlator_t *) 0xb7ee7ba0 ret = 0 #9 0xb7ec2230 in transport_peerproc (trans_data=0x80941b8) at ../../../libglusterfs/src/transport.c:414 trans = (transport_t *) 0x80941b8 msg = (struct transport_msg *) 0x8174d70 #10 0xb7e8a383 in start_thread () from /lib/libpthread.so.0 No symbol table info available. #11 0xb7e0f05e in clone () from /lib/libc.so.6 No symbol table info available. (gdb) p *frame $1 = {root = 0xab029000, parent = 0x66a58b7, next = 0x66a58aa, prev = 0xa6dac0aa, local = 0x0, this = 0x58a68b7, ret = 0x44dd5408, ref_count = 183, lock = 256, cookie = 0x0, complete = _gf_false} (gdb) p *frame->this Cannot access memory at address 0x58a68b7 And here is the backtrace of crash in client_readdirp_cbk. #0 0xb7553208 in client_readdirp_cbk (frame=0xa89072a7, hdr=0xa89c01e0, hdrlen=1316, iobuf=0x0) at ../../../../../xlators/protocol/client/src/client-protocol.c:4461 fn = (ret_fn_t) 0x5093b008 _parent = (call_frame_t *) 0x994668a8 old_THIS = (xlator_t *) 0x0 rsp = (gf_fop_readdirp_rsp_t *) 0xa89c024c op_ret = 11 op_errno = 0 buf_size = 1204 entries = {{list = {next = 0xa8702268, prev = 0xa8715bc0}, {next = 0xa8702268, prev = 0xa8715bc0}}, d_ino = 12773734187154396736, d_off = 7486, d_len = 2828271320, d_type = 3075854596, d_stat = { st_dev = 12147639625727274734, __pad1 = 22120, __st_ino = 3086215132, st_mode = 2974114408, st_nlink = 3075854656, st_uid = 1, st_gid = 2974116752, st_rdev = 13210695157743444568, __pad2 = 0, st_size = -5673020022532510832, st_blksize = -1219209821, st_blocks = 134829584, st_atim = {tv_sec = 0, tv_nsec = -1219209931}, st_mtim = {tv_sec = 7486, tv_nsec = 0}, st_ctim = {tv_sec = 0, tv_nsec = 4}, st_ino = 0}, d_name = 0xb1456248 "L\002\234ยจ\v"} #1 0xb75589d2 in protocol_client_interpret (this=0x80580f8, trans=0x8095308, hdr_p=0xa89c01e0 "", hdrlen=1316, iobuf=0x0) at ../../../../../xlators/protocol/client/src/client-protocol.c:6507 ret = -1 frame = (call_frame_t *) 0xa89072a7 hdr = (gf_hdr_common_t *) 0xa89c01e0 callid = 7486 type = 4 op = 44 conn = (client_connection_t *) 0x8095610 __FUNCTION__ = "protocol_client_interpret" #2 0xb75595ee in protocol_client_pollin (this=0x80580f8, trans=0x8095308) at ../../../../../xlators/protocol/client/src/client-protocol.c:6805 conf = (client_conf_t *) 0x8094e30 ret = 0 iobuf = (struct iobuf *) 0x0 hdr = 0xa89c01e0 "" hdrlen = 1316 #3 0xb7559abd in notify (this=0x80580f8, event=2, data=0x8095308) at ../../../../../xlators/protocol/client/src/client-protocol.c:6924 i = 0 ret = -1 child_down = 1 was_not_down = 0 trans = (transport_t *) 0x8095308 conn = (client_connection_t *) 0x0 conf = (client_conf_t *) 0x8094e30 parent = (xlator_list_t *) 0x0 __FUNCTION__ = "notify" #4 0xb7f66720 in xlator_notify (xl=0x80580f8, event=2, data=0x8095308) at ../../../libglusterfs/src/xlator.c:923 old_THIS = (xlator_t *) 0xb7f98ba0 ret = 0 #5 0xb7f73230 in transport_peerproc (trans_data=0x8095308) at ../../../libglusterfs/src/transport.c:414 trans = (transport_t *) 0x8095308 msg = (struct transport_msg *) 0xa8989920 #6 0xb7f3b383 in start_thread () from /lib/libpthread.so.0 No symbol table info available. #7 0xb7ec005e in clone () from /lib/libc.so.6 No symbol table info available. (gdb) p *frame $1 = {root = 0x99018000, parent = 0x994668a8, next = 0x994668a8, prev = 0x9901f4a8, local = 0xa8, this = 0x580f800, ret = 0x5093b008, ref_count = 183, lock = 256, cookie = 0x0, complete = _gf_false} (gdb) p *frame->this Cannot access memory at address 0x580f800 I was running, while true; do dbench -t 100 10; done on the mount point.
This bug is no longer reproducible on latest git pull. Please reopen if the bug is found again.