Hide Forgot
Thread 1 (process 29962): #0 0x00000034a667a3c0 in memcpy () from /lib64/libc.so.6 No symbol table info available. #1 0x00002ba2434be2e5 in afr_writev_wind_cbk (frame=0x12fc2848, cookie=0x0, this=0xbc233c0, op_ret=131072, op_errno=0, prebuf=0x42f0bd00, postbuf=0x1) postbuf is 0x1 even though op_ret is positive. Some translator below afr has not set postbuf properly, causing the crash.
patchset: v3.0.3-2-g9aed760 signal received: 11 time of crash: 2010-05-05 10:04:07 configuration details: argp 1 backtrace 1 dlfcn 1 fdatasync 1 libpthread 1 llistxattr 1 setfsid 1 spinlock 1 epoll.h 1 xattr.h 1 st_atim.tv_nsec 1 package-string: glusterfs 3.0.4rc1 /lib64/libc.so.6[0x34a66301b0] /lib64/libc.so.6(memcpy+0xa0)[0x34a667a3c0] /usr/lib64/glusterfs/3.0.4rc1/xlator/cluster/afr.so(afr_writev_wind_cbk+0x10b)[0x2ba2434be2e5] /usr/lib64/glusterfs/3.0.4rc1/xlator/protocol/client.so(client_readv_cbk+0x1dc)[0x2ba24328ab71] /usr/lib64/glusterfs/3.0.4rc1/xlator/protocol/client.so(protocol_client_interpret+0x1ce)[0x2ba2432910 59] /usr/lib64/glusterfs/3.0.4rc1/xlator/protocol/client.so(protocol_client_pollin+0xb0)[0x2ba243291d1f] /usr/lib64/glusterfs/3.0.4rc1/xlator/protocol/client.so(notify+0x16c)[0x2ba243292396] /usr/lib64/libglusterfs.so.0(xlator_notify+0xf5)[0x3b19a1875a] /usr/lib64/glusterfs/3.0.4rc1/transport/ib-verbs.so[0x2aaaaab1dd31] /lib64/libpthread.so.0[0x34a72062f7] /lib64/libc.so.6(clone+0x6d)[0x34a66d1e3d] (gdb) thread apply all bt full #0 0x00000034a66d2228 in epoll_wait () from /lib64/libc.so.6 No symbol table info available. #1 0x0000003b19a3d87b in event_dispatch_epoll (event_pool=0xbc1b870) at event.c:850 events = (struct epoll_event *) 0xbc39360 size = 1 i = 1 ret = 0 __FUNCTION__ = "event_dispatch_epoll" #2 0x0000003b19a3dbf0 in event_dispatch (event_pool=0xbc1b870) at event.c:975 ret = -1 __FUNCTION__ = "event_dispatch" #3 0x0000000000405f0b in main (argc=5, argv=0x7fff4cd1db78) at glusterfsd.c:1413 ctx = (glusterfs_ctx_t *) 0xbc1a010 cmd_args = (cmd_args_t *) 0xbc1a010 pool = (call_pool_t *) 0xbc1a360 stbuf = {st_dev = 750006345, st_ino = 191, st_nlink = 140734482209248, st_mode = 191, st_uid = 0, st_gid = 1288821351, pad0 = 32767, st_rdev = 226130125849, st_size = 47975902512216, st_blksize = 140734482209382, st_blocks = 223338299465, st_atim = {tv_sec = 6442450944, tv_nsec = 0}, st_mtim = {tv_sec = 0, tv_nsec = 0}, st_ctim = {tv_sec = 5658608, tv_nsec = 191}, __unused = {5658608, 140734482209382, 140734482209383}} tmp_logfile = '\0' <repeats 1023 times> tmp_logfile_dyn = 0x0 tmp_logfilebase = 0x0 timestr = '\0' <repeats 255 times> utime = 1269957365 tm = (struct tm *) 0x0 ret = 0 lim = {rlim_cur = 18446744073709551615, rlim_max = 18446744073709551615} specfp = (FILE *) 0xbc1bb50 graph = (xlator_t *) 0xbc1bdb0 trav = (xlator_t *) 0x0 fuse_volume_found = 0 xl_count = 11 process_mode = 1 '\001' pipe_fd = {6, 7} gf_success = 0 __FUNCTION__ = "main" Thread 4 (process 29955): #0 0x00000034a6698611 in nanosleep () from /lib64/libc.so.6 No symbol table info available. #1 0x00000034a66cbb24 in usleep () from /lib64/libc.so.6 No symbol table info available. #2 0x0000003b19a27c71 in gf_timer_proc (ctx=0xbc1a010) at timer.c:177 now = 1273068247038857 now_tv = {tv_sec = 1273068247, tv_usec = 38857} event = (gf_timer_t *) 0x2aaac3c6bea0 reg = (gf_timer_registry_t *) 0xbc251a0 __FUNCTION__ = "gf_timer_proc" #3 0x00000034a72062f7 in start_thread () from /lib64/libpthread.so.0 No symbol table info available. #4 0x00000034a66d1e3d in clone () from /lib64/libc.so.6 No symbol table info available. Thread 3 (process 29961): #0 0x00000034a720cc8b in read () from /lib64/libpthread.so.0 No symbol table info available. #1 0x000000304fa09eac in __ibv_get_cq_event (channel=<value optimized out>, cq=0x4250b100, cq_context=0x4250b0f8) at /usr/include/bits/unistd.h:35 ev = {cq_handle = 4096} #2 0x00002aaaaab1def0 in ib_verbs_send_completion_proc (data=0xbc28340) at ib-verbs.c:1240 chan = (struct ibv_comp_channel *) 0xbc28340 post = (ib_verbs_post_t *) 0x2aaac3d43b30 peer = (ib_verbs_peer_t *) 0xbc34018 event_cq = (struct ibv_cq *) 0xbc28500 event_ctx = (void *) 0xbc2b6e0 device = (ib_verbs_device_t *) 0xbc2b6e0 wc = {wr_id = 46912918272816, status = IBV_WC_SUCCESS, opcode = IBV_WC_SEND, vendor_err = 0, byte_len = 0, imm_data = 0, qp_num = 4718671, src_qp = 0, wc_flags = 0, pkey_index = 0, slid = 0, sl = 0 '\0', dlid_path_bits = 0 '\0'} ret = 0 __FUNCTION__ = "ib_verbs_send_completion_proc" #3 0x00000034a72062f7 in start_thread () from /lib64/libpthread.so.0 #4 0x00000034a66d1e3d in clone () from /lib64/libc.so.6 No symbol table info available. Thread 2 (process 29963): #0 0x00000034a66cac43 in readv () from /lib64/libc.so.6 No symbol table info available. #1 0x00002ba243b3e683 in fuse_thread_proc (data=0xbc1bdb0) at fuse-bridge.c:3117 mount_point = 0x0 this = (xlator_t *) 0xbc1bdb0 priv = (fuse_private_t *) 0xbc253a0 res = 80 iobuf = (struct iobuf *) 0x2aaac7287ce0 finh = (fuse_in_header_t *) 0x12ffccb0 iov_in = {{iov_base = 0x12f4dec0, iov_len = 80}, {iov_base = 0x2aaace0d2000, iov_len = 131072}} msg = (void *) 0x12ffccd8 msg0_size = 168 ret = 0 now = {tv_sec = 1269957365, tv_usec = 509917} timeout = {tv_sec = 1269957366, tv_nsec = 509917000} __FUNCTION__ = "fuse_thread_proc" #2 0x00000034a72062f7 in start_thread () from /lib64/libpthread.so.0 No symbol table info available. #3 0x00000034a66d1e3d in clone () from /lib64/libc.so.6 No symbol table info available. Thread 1 (process 29962): #0 0x00000034a667a3c0 in memcpy () from /lib64/libc.so.6 No symbol table info available. #1 0x00002ba2434be2e5 in afr_writev_wind_cbk (frame=0x12fc2848, cookie=0x0, this=0xbc233c0, op_ret=131072, op_errno=0, prebuf=0x42f0bd00, postbuf=0x1) at afr-inode-write.c:115 local = (afr_local_t *) 0x12ff9470 priv = (afr_private_t *) 0xbc259f0 child_index = 0 call_count = -1 need_unwind = 0 _parent = (call_frame_t *) 0x12fc2848 old_THIS = (xlator_t *) 0xbc21990 rsp = (gf_fop_read_rsp_t *) 0x2aaac3d2748c op_ret = 131072 op_errno = 0 vector = {iov_base = 0x2aaace3d2000, iov_len = 131072} stbuf = {st_dev = 41027, st_ino = 108072142, st_nlink = 1, st_mode = 33204, st_uid = 508, st_gid = 508, pad0 = 0, st_rdev = 4294967295, st_size = 64968920, st_blksize = 131072, st_blocks = 126999, st_atim = {tv_sec = 1273068251, tv_nsec = 0}, st_mtim = {tv_sec = 0, tv_nsec = 0}, st_ctim = {tv_sec = 1273068250, tv_nsec = 0}, __unused = {0, 0, 0}} iobref = (struct iobref *) 0x2aaac3ce31c0 #3 0x00002ba243291059 in protocol_client_interpret (this=0xbc21990, trans=0xbc33c90, hdr_p=0x2aaac3d27420 "", hdrlen=188, iobuf=0x2aaac72880a0) at client-protocol.c:6529 ret = -1 frame = (call_frame_t *) 0x12ffc710 hdr = (gf_hdr_common_t *) 0x2aaac3d27420 callid = 4719815 type = 4 op = 11 conn = (client_connection_t *) 0xbc34220 __FUNCTION__ = "protocol_client_interpret" #4 0x00002ba243291d1f in protocol_client_pollin (this=0xbc21990, trans=0xbc33c90) at client-protocol.c:6827 conf = (client_conf_t *) 0xbc2ff60 ret = 0 iobuf = (struct iobuf *) 0x2aaac72880a0 hdr = 0x2aaac3d27420 "" hdrlen = 188 #5 0x00002ba243292396 in notify (this=0xbc21990, event=2, data=0xbc33c90) at client-protocol.c:6946 i = 0 ret = -1 child_down = 1 was_not_down = 0 trans = (transport_t *) 0xbc33c90 conn = (client_connection_t *) 0x0 conf = (client_conf_t *) 0xbc2ff60 ret = 0 iobuf = (struct iobuf *) 0x2aaac72880a0 hdr = 0x2aaac3d27420 "" hdrlen = 188 #5 0x00002ba243292396 in notify (this=0xbc21990, event=2, data=0xbc33c90) at client-protocol.c:6946 i = 0 ret = -1 child_down = 1 was_not_down = 0 trans = (transport_t *) 0xbc33c90 conn = (client_connection_t *) 0x0 ---Type <return> to continue, or q <return> to quit--- conf = (client_conf_t *) 0xbc2ff60 parent = (xlator_list_t *) 0x0 __FUNCTION__ = "notify" #6 0x0000003b19a1875a in xlator_notify (xl=0xbc21990, event=2, data=0xbc33c90) at xlator.c:924 old_THIS = (xlator_t *) 0x3b19c68e40 ret = 0 #7 0x00002aaaaab1dd31 in ib_verbs_recv_completion_proc (data=0xbc283a0) at ib-verbs.c:1192 chan = (struct ibv_comp_channel *) 0xbc283a0 priv = (ib_verbs_private_t *) 0xbc34000 device = (ib_verbs_device_t *) 0xbc2b6e0 post = (ib_verbs_post_t *) 0xbc2f110 peer = (ib_verbs_peer_t *) 0xbc34018 event_cq = (struct ibv_cq *) 0xbc283c0 wc = {wr_id = 197325072, status = IBV_WC_SUCCESS, opcode = IBV_WC_RECV, vendor_err = 0, byte_len = 131272, imm_data = 0, qp_num = 4718671, src_qp = 1722863, wc_flags = 0, pkey_index = 0, slid = 3, sl = 0 '\0', dlid_path_bits = 0 '\0'} event_ctx = (void *) 0xbc2b6e0 ret = 1 __FUNCTION__ = "ib_verbs_recv_completion_proc" #8 0x00000034a72062f7 in start_thread () from /lib64/libpthread.so.0 No symbol table info available. #9 0x00000034a66d1e3d in clone () from /lib64/libc.so.6 No symbol table info available. (gdb)
/usr/lib64/glusterfs/3.0.4rc1/xlator/cluster/afr.so(afr_writev_wind_cbk+0x10b)[0x2ba2434be2e5] /usr/lib64/glusterfs/3.0.4rc1/xlator/protocol/client.so(client_readv_cbk+0x1dc)[0x2ba24328ab71] from the above log it seems that afr_writev_wind_cbk is getting its arguments from client_readv_cbk, and the client_readv_cbk is sending wrong values while unwinding. "STACK_UNWIND(frame, op_ret, op_errno, &vector, 1, &stbuf, iobref); This in afr_writev_wind_cbk postbuf is getting the value of 1 instead of stbuf, thus causing a crash.
This has not been observed often. Moving this to 3.0.6
We have been unable to reproduce / determine the cause of crash. This can be re-opened when it is observed again.