Hide Forgot
PATCH: http://patches.gluster.com/patch/2675 in release-2.0 (protocol/server: use properly filled loc_t for performing revalidates)
(description and backtrace from Gordan Bobic) Backtrace attached. Gordan Anand Avati wrote: >> [2010-01-06 21:35:54] N [server-protocol.c:7065:mop_setvolume] server-home: >> accepted client from 10.2.3.1:1023 >> [2010-01-06 21:35:54] N [server-protocol.c:7065:mop_setvolume] server-home: >> accepted client from 10.2.3.1:1022 >> pending frames: >> frame : type(1) op(LOOKUP) >> frame : type(1) op(LK) >> >> patchset: v2.0.9 >> signal received: 11 >> time of crash: 2010-01-06 21:36:12 >> configuration details: >> argp 1 >> backtrace 1 >> db.h 1 >> dlfcn 1 >> fdatasync 1 >> libpthread 1 >> llistxattr 1 >> setfsid 1 >> spinlock 1 >> epoll.h 1 >> xattr.h 1 >> st_atim.tv_nsec 1 >> package-string: glusterfs 2.0.9 >> /lib64/libc.so.6[0x3f55e302d0] >> /usr/lib64/glusterfs/2.0.9/xlator/protocol/client.so(client_lookup+0xc8)[0x2afea8f89438] >> /usr/lib64/glusterfs/2.0.9/xlator/cluster/replicate.so(afr_lookup+0x226)[0x2afea97e3e66] >> /usr/lib64/glusterfs/2.0.9/xlator/protocol/server.so(server_lookup_cbk+0x513)[0x2afea95cb2a3] >> /usr/lib64/glusterfs/2.0.9/xlator/cluster/replicate.so(afr_self_heal_cbk+0x8e)[0x2afea97e46fe] >> /usr/lib64/glusterfs/2.0.9/xlator/cluster/replicate.so(afr_sh_data_done+0xbe)[0x2afea97f8bce] >> /usr/lib64/glusterfs/2.0.9/xlator/cluster/replicate.so(afr_sh_data_flush_cbk+0x44)[0x2afea97fa284] >> /usr/lib64/glusterfs/2.0.9/xlator/cluster/replicate.so(afr_sh_data_utimes_cbk+0x9)[0x2afea97fa2a9] >> /usr/lib64/glusterfs/2.0.9/xlator/protocol/client.so(client_utimens_cbk+0x14e)[0x2afea8f9152e] >> /usr/lib64/glusterfs/2.0.9/xlator/protocol/client.so(protocol_client_pollin+0xca)[0x2afea8f808aa] >> /usr/lib64/glusterfs/2.0.9/xlator/protocol/client.so(notify+0x212)[0x2afea8f874e2] >> /usr/lib64/glusterfs/2.0.9/transport/socket.so(socket_event_handler+0xd3)[0x2aaaaaaafe33] >> /usr/lib64/libglusterfs.so.0[0x3f56a27115] >> /usr/sbin/glusterfs(main+0xa06)[0x403e96] >> /lib64/libc.so.6(__libc_start_main+0xf4)[0x3f55e1d994] >> /usr/sbin/glusterfs[0x402509] >> --------- > > Can you send us a 'apply thread all bt full' from this core? This > might be a bug which we fixed in 3.0 but have missed applying the > patch in 2.0 tree. The backtrace can help us here. > > Avati --------------020208020002090606060402 Content-Type: text/plain; name="core.log.1738" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="core.log.1738" Core was generated by `/usr/sbin/glusterfs --log-level=NORMAL --disable-direct-io-mode --volfile=/etc/'. Program terminated with signal 11, Segmentation fault. [New process 1738] [New process 1742] [New process 1739] #0 0x00002afea8f89438 in client_lookup (frame=0x2aaaac0ca450, this=0xeee8050, loc=0x7fffb067a130, xattr_req=0x2aaaac001b10) at client-protocol.c:3265 3265 if (loc->parent->ino && ret < 0) { Thread 3 (process 1739): #0 0x0000003f55e9a0b1 in nanosleep () from /lib64/libc.so.6 No symbol table info available. #1 0x0000003f55ecd684 in usleep () from /lib64/libc.so.6 No symbol table info available. #2 0x0000003f56a1a8b4 in gf_timer_proc (ctx=<value optimized out>) at timer.c:177 now = <value optimized out> now_tv = Could not find the frame base for "gf_timer_proc". event = <value optimized out> reg = <value optimized out> __FUNCTION__ = "gf_timer_proc" #3 0x0000003f56606617 in start_thread () from /lib64/libpthread.so.0 No symbol table info available. #4 0x0000003f55ed3c2d in clone () from /lib64/libc.so.6 No symbol table info available. Thread 2 (process 1742): #0 0x0000003f5660d73b in read () from /lib64/libpthread.so.0 No symbol table info available. #1 0x0000003f5820ec9a in ?? () from /usr/lib64/libfuse.so.2 No symbol table info available. #2 0x0000003f58212650 in fuse_chan_receive () from /usr/lib64/libfuse.so.2 No symbol table info available. #3 0x00002afea9a16230 in fuse_thread_proc (data=<value optimized out>) at fuse-bridge.c:2541 mount_point = <value optimized out> this = (xlator_t *) 0xeee3dc0 priv = (fuse_private_t *) 0xeef0930 res = 56 iobuf = (struct iobuf *) 0xeee37f8 chan_size = 135168 ret = <value optimized out> now = {tv_sec = 1262774948, tv_usec = 67997} timeout = {tv_sec = 1262774949, tv_nsec = 67997000} __FUNCTION__ = "fuse_thread_proc" #4 0x0000003f56606617 in start_thread () from /lib64/libpthread.so.0 No symbol table info available. #5 0x0000003f55ed3c2d in clone () from /lib64/libc.so.6 No symbol table info available. Thread 1 (process 1738): #0 0x00002afea8f89438 in client_lookup (frame=0x2aaaac0ca450, this=0xeee8050, loc=0x7fffb067a130, xattr_req=0x2aaaac001b10) at client-protocol.c:3265 hdr = <value optimized out> req = <value optimized out> ret = -1 ino = 1 par = 0 dictlen = 0 pathlen = <value optimized out> baselen = 0 local = <value optimized out> buf = 0x0 __FUNCTION__ = "client_lookup" #1 0x00002afea97e3e66 in afr_lookup (frame=0x2aaaac0c8dc0, this=<value optimized out>, loc=0x7fffb067a130, xattr_req=0x2aaaac001b10) at afr.c:693 _new = (call_frame_t *) 0x0 ret = <value optimized out> i = <value optimized out> ctx = 116 __FUNCTION__ = "afr_lookup" #2 0x00002afea95cb2a3 in server_lookup_cbk (frame=0x2aaaac01fc90, cookie=<value optimized out>, this=0xeeeb9d0, op_ret=-1, op_errno=116, inode=0xef99da0, stbuf=0x2aaaac0c76b0, dict=0x2aaaac0c5180) at server-protocol.c:2297 _new = (call_frame_t *) 0x0 hdr = <value optimized out> state = (server_state_t *) 0x2aaaac0fd030 dict_len = <value optimized out> gf_errno = <value optimized out> ret = <value optimized out> loc = {path = 0x2aaaac0abab8 "afr.home2", name = 0x0, ino = 0, inode = 0x2aaaac0bdb40, parent = 0x0} __FUNCTION__ = "server_lookup_cbk" #3 0x00002afea97e46fe in afr_self_heal_cbk (frame=0x2aaaac0f9bb0, this=<value optimized out>) at afr.c:400 fn = (int32_t (*)(call_frame_t *, call_frame_t *, xlator_t *, int32_t, int32_t, ...)) 0x2afea97e4080 <afr_lookup_cbk> _parent = (call_frame_t *) 0x0 __local = (afr_local_t *) 0x2aaaac0c7560 __this = (xlator_t *) 0xeeeaf40 local = (afr_local_t *) 0x2aaaac0c7560 #4 0x00002afea97f8bce in afr_sh_data_done (frame=0x2aaaac0f9bb0, this=0xeeeaf40) at afr-self-heal-data.c:81 local = (afr_local_t *) 0x2aaaac0c7560 sh = (afr_self_heal_t *) 0x2aaaac0c8ae8 priv = (afr_private_t *) 0xeeef9d0 i = -1335386096 __FUNCTION__ = "afr_sh_data_done" #5 0x00002afea97fa284 in afr_sh_data_flush_cbk (frame=0x2aaaac0f9bb0, cookie=<value optimized out>, this=0xeeeaf40, op_ret=<value optimized out>, op_errno=<value optimized out>) at afr-self-heal-data.c:108 call_count = 0 #6 0x00002afea97fa2a9 in afr_sh_data_utimes_cbk (frame=0x0, cookie=0xeee8050, this=0x7fffb067a010, op_ret=-1409286094, op_errno=-16843009, buf=0x2f2f2f2f2f2f2f2f) at afr-self-heal-data.c:119 No locals. #7 0x00002afea8f9152e in client_utimens_cbk (frame=0x2aaaac0fcac0, hdr=0x2aaaac0fcb10, hdrlen=<value optimized out>, iobuf=<value optimized out>) at client-protocol.c:4162 fn = (int32_t (*)(call_frame_t *, call_frame_t *, xlator_t *, int32_t, int32_t, ...)) 0x2afea97e4080 <afr_lookup_cbk> _parent = (call_frame_t *) 0x0 stbuf = {st_dev = 2306, st_ino = 294208129, st_nlink = 1, st_mode = 33152, st_uid = 1001, st_gid = 1001, pad0 = 0, st_rdev = 0, st_size = 136, st_blksize = 4096, st_blocks = 16, st_atim = {tv_sec = 1262793731, tv_nsec = 0}, st_mtim = { tv_sec = 1262793731, tv_nsec = 0}, st_ctim = {tv_sec = 1262813772, tv_nsec = 0}, __unused = {0, 0, 0}} op_errno = 0 #8 0x00002afea8f808aa in protocol_client_pollin (this=0xeee8d00, trans=0xeeef4c0) at client-protocol.c:6347 conf = (client_conf_t *) 0xeeeeb00 ret = 0 iobuf = (struct iobuf *) 0x0 hdr = 0x2aaaac0fcb10 "" hdrlen = 108 #9 0x00002afea8f874e2 in notify (this=0x0, event=2, data=0xeeef4c0) at client-protocol.c:6390 ret = <value optimized out> child_down = <value optimized out> was_not_down = <value optimized out> trans = (transport_t *) 0xeee8050 conn = <value optimized out> conf = (client_conf_t *) 0xeeeeb00 parent = <value optimized out> __FUNCTION__ = "notify" #10 0x00002aaaaaaafe33 in socket_event_handler (fd=<value optimized out>, idx=2, data=0xeeef4c0, poll_in=1, poll_out=0, poll_err=0) at socket.c:814 this = (transport_t *) 0x0 priv = (socket_private_t *) 0xeeef7c0 ret = 0 #11 0x0000003f56a27115 in event_dispatch_epoll (event_pool=<value optimized out>) at event.c:804 events = <value optimized out> i = <value optimized out> ret = <value optimized out> __FUNCTION__ = "event_dispatch_epoll" #12 0x0000000000403e96 in main (argc=5, argv=0x7fffb067b198) at glusterfsd.c:1317 ctx = <value optimized out> cmd_args = <value optimized out> stbuf = {st_dev = 0, st_ino = 140736152970928, st_nlink = 0, st_mode = 1436594605, st_uid = 63, st_gid = 0, pad0 = 0, st_rdev = 1261711580, st_size = 0, st_blksize = 272019541654, st_blocks = 140736152972448, st_atim = {tv_sec = 272019534192, tv_nsec = 140736152972511}, st_mtim = {tv_sec = 140736152972496, tv_nsec = 140736152972488}, st_ctim = {tv_sec = 272021703448, tv_nsec = 1}, __unused = {0, 6323616, 272023768447}} tmp_logfile = '\0' <repeats 1023 times> timestr = '\0' <repeats 255 times> utime = 1262774948 tm = <value optimized out> ret = 0 lim = {rlim_cur = 18446744073709551615, rlim_max = 18446744073709551615} specfp = <value optimized out> graph = (xlator_t *) 0x0 trav = <value optimized out> fuse_volume_found = 0 xl_count = <value optimized out> pipe_fd = {6, 7} gf_success = 0 gf_failure = -1 __FUNCTION__ = "main"