Description of problem: While running tests/basic/volume-locks.t, after a node is brought down during two simultaneous volume sets, and is bought back up, the subsequent volume stop operation encounters a crash with the following backtrace: (gdb) bt #0 0x000000332a835c55 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:63 #1 0x000000332a837408 in __GI_abort () at abort.c:90 #2 0x000000332a87564b in __libc_message (do_abort=do_abort@entry=2, fmt=fmt@entry=0x332a979a08 "*** glibc detected *** %s: %s: 0x%s ***\n") at ../sysdeps/unix/sysv/linux/libc_fatal.c:197 #3 0x000000332a87cb3e in malloc_printerr (ptr=0x1494f00, str=0x332a979af0 "free(): corrupted unsorted chunks", action=3) at malloc.c:4969 #4 _int_free (av=0x332abb1740 <main_arena>, p=0x1494ef0, have_lock=0) at malloc.c:3826 #5 0x000000332a86b1bd in _IO_new_fclose (fp=0x1494f00) at iofclose.c:85 #6 0x00007fcfd178753a in gf_store_save_value (fd=fd@entry=17, key=key@entry=0x7fcfce363c69 "state", value=value@entry=0x7fff8f7e8ef0 "3") at store.c:346 #7 0x00007fcfce325936 in glusterd_store_peer_write (fd=fd@entry=17, peerinfo=peerinfo@entry=0x14c5870) at glusterd-store.c:2443 #8 0x00007fcfce3259a0 in glusterd_store_perform_peer_store (peerinfo=peerinfo@entry=0x14c5870) at glusterd-store.c:2471 #9 0x00007fcfce325ade in glusterd_store_peerinfo (peerinfo=peerinfo@entry=0x14c5870) at glusterd-store.c:2500 #10 0x00007fcfce2f2636 in glusterd_friend_sm () at glusterd-sm.c:1068 #11 0x00007fcfce328a18 in __glusterd_mgmt_hndsk_version_ack_cbk (req=req@entry=0x14d845c, iov=iov@entry=0x14d849c, count=count@entry=1, myframe=myframe@entry=0x7fcfd03b6694) at glusterd-handshake.c:830 #12 0x00007fcfce31fb40 in glusterd_big_locked_cbk (req=0x14d845c, iov=0x14d849c, count=1, myframe=0x7fcfd03b6694, fn=0x7fcfce3288e0 <__glusterd_mgmt_hndsk_version_ack_cbk>) at glusterd-rpc-ops.c:205 #13 0x00007fcfd15333a4 in rpc_clnt_handle_reply (clnt=clnt@entry=0x14d8270, pollin=0x14b74c0) at rpc-clnt.c:773 #14 0x00007fcfd153372d in rpc_clnt_notify (trans=<optimized out>, mydata=0x14d82a0, event=<optimized out>, data=<optimized out>) at rpc-clnt.c:901 #15 0x00007fcfd152fc13 in rpc_transport_notify (this=this@entry=0x14e1890, event=event@entry=RPC_TRANSPORT_MSG_RECEIVED, data=data@entry=0x14b74c0) at rpc-transport.c:512 #16 0x00007fcfce04a0b4 in socket_event_poll_in (this=this@entry=0x14e1890) at socket.c:2119 #17 0x00007fcfce04c8ac in socket_event_handler (fd=<optimized out>, idx=<optimized out>, data=0x14e1890, poll_in=1, poll_out=0, poll_err=0) at socket.c:2232 #18 0x00007fcfd17a707a in event_dispatch_epoll_handler (i=<optimized out>, events=0x14b5d60, event_pool=0x1493ee0) at event-epoll.c:384 #19 event_dispatch_epoll (event_pool=0x1493ee0) at event-epoll.c:445 #20 0x0000000000404b80 in main (argc=9, argv=0x7fff8f7ea958) at glusterfsd.c:1967 (gdb) f 6 #6 0x00007fcfd178753a in gf_store_save_value (fd=fd@entry=17, key=key@entry=0x7fcfce363c69 "state", value=value@entry=0x7fff8f7e8ef0 "3") at store.c:346 346 fclose (fp); (gdb) p *fp $1 = {_flags = 0, _IO_read_ptr = 0x0, _IO_read_end = 0x0, _IO_read_base = 0x0, _IO_write_base = 0x0, _IO_write_ptr = 0x0, _IO_write_end = 0x0, _IO_buf_base = 0x0, _IO_buf_end = 0x0, _IO_save_base = 0x0, _IO_backup_base = 0x0, _IO_save_end = 0x0, _markers = 0x0, _chain = 0x149ff70, _fileno = -1, _flags2 = 0, _old_offset = 8390328346775352417, _cur_column = 0, _vtable_offset = 110 'n', _shortbuf = " ", _lock = 0x1494fe0, _offset = -1, __pad1 = 0x79726f7463657269, __pad2 = 0x1494ff0, __pad3 = 0x0, __pad4 = 0x206472657473756c, __pad5 = 8245937404519132461, _mode = -1, _unused2 = "ion management.trans"} (gdb) f 9 #9 0x00007fcfce325ade in glusterd_store_peerinfo (peerinfo=peerinfo@entry=0x14c5870) at glusterd-store.c:2500 2500 ret = glusterd_store_perform_peer_store (peerinfo); (gdb) p peerinfo $2 = (glusterd_peerinfo_t *) 0x14c5870 (gdb) p *$2 $3 = {uuid = "G\025\315\000q\306K~\270\026\233\030\n-\220\063", uuid_str = '\000' <repeats 49 times>, state = {state = GD_FRIEND_STATE_BEFRIENDED, transition_time = {tv_sec = 0, tv_usec = 0}}, hostname = 0x14a69f0 "127.1.1.2", port = 0, uuid_list = {next = 0x14b3cb0, prev = 0x14bd9f0}, op_peers_list = {next = 0x0, prev = 0x0}, rpc = 0x14d8270, mgmt = 0x7fcfce591a80 <gd_mgmt_prog>, peer = 0x7fcfce591a00 <gd_peer_prog>, mgmt_v3 = 0x7fcfce5919c0 <gd_mgmt_v3_prog>, connected = 1, shandle = 0x14be490, sm_log = {transitions = 0x14c5980, current = 0, size = 50, count = 1, state_name_get = 0x7fcfce2f1720 <glusterd_friend_sm_state_name_get>, event_name_get = 0x7fcfce2f1740 <glusterd_friend_sm_event_name_get>}, quorum_action = _gf_true, quorum_contrib = QUORUM_WAITING, locked = _gf_false} (gdb) info threads Id Target Id Frame 6 Thread 0x7fcfcfd9a700 (LWP 22863) 0x000000332a8e580d in write () at ../sysdeps/unix/syscall-template.S:81 5 Thread 0x7fcfcda84700 (LWP 23165) 0x000000332b00e86d in nanosleep () at ../sysdeps/unix/syscall-template.S:81 4 Thread 0x7fcfcd07c700 (LWP 23170) pthread_cond_wait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:165 3 Thread 0x7fcfced98700 (LWP 22865) pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:217 2 Thread 0x7fcfcf599700 (LWP 22864) pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:217 * 1 Thread 0x7fcfd12f0740 (LWP 22862) 0x000000332a835c55 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:63 (gdb) t 6 [Switching to thread 6 (Thread 0x7fcfcfd9a700 (LWP 22863))] #0 0x000000332a8e580d in write () at ../sysdeps/unix/syscall-template.S:81 81 T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) (gdb) bt #0 0x000000332a8e580d in write () at ../sysdeps/unix/syscall-template.S:81 #1 0x000000332a8767a3 in _IO_new_file_write (f=0x1494f00, data=0x7fcfd130b000, n=8) at fileops.c:1294 #2 0x000000332a876682 in new_do_write (fp=0x1494f00, data=0x7fcfd130b000 "", to_do=8) at fileops.c:538 #3 0x000000332a877de5 in _IO_new_do_write (fp=<optimized out>, data=<optimized out>, to_do=8) at fileops.c:511 #4 0x000000332a879747 in _IO_flush_all_lockp (do_lock=do_lock@entry=0) at genops.c:845 #5 0x000000332a879898 in _IO_cleanup () at genops.c:1006 #6 0x000000332a838ef0 in __run_exit_handlers (status=status@entry=0, listp=<optimized out>, run_list_atexit=run_list_atexit@entry=true) at exit.c:90 #7 0x000000332a838f25 in __GI_exit (status=status@entry=0) at exit.c:99 #8 0x00000000004070f7 in cleanup_and_exit (signum=<optimized out>) at glusterfsd.c:1119 #9 0x00000000004071d5 in glusterfs_sigwaiter (arg=<optimized out>) at glusterfsd.c:1645 #10 0x000000332b007d15 in start_thread (arg=0x7fcfcfd9a700) at pthread_create.c:308 #11 0x000000332a8f253d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:114 Version-Release number of selected component (if applicable): How reproducible: Once or twice in every 10 times Steps to Reproduce: 1.for i in {1..20}; do echo "#######TEST $i######"; prove -r ./tests/basic/volume-locks.t; done Actual results: A core is generated. Expected results: glusterd should not crash Additional info:
Updating as part of the bug triage. I couldn't see any update on this bug. Do you know whether the problem exist in the latest code base ?
Given this issue is not seen any further and reporter hasn't come back with further updates I am closing this issue. Please feel free to reopen if the issue persists.
The needinfo request[s] on this closed bug have been removed as they have been unresolved for 1000 days