2x2 distributed replicate volume. killed 1 of the bricks (1st brick). gave replace brick for the 4th brick. Replace brick started and continued for 2 minutes. After that source brick had crashed. 2 fuse mount points. 1 mount point was executing rdd and the other was executing sanity script (in that 100000 small files creation). This is the backtrace. Core was generated by `/opt/glusterfs/3.2.5qa6/sbin/glusterfsd --xlator-option mirror-server.listen-po'. Program terminated with signal 11, Segmentation fault. #0 0x00007fe76381879b in default_flush (frame=0x7fe734587ae0, this=0x1c6d190, fd=0x7fe759c7dee4) at defaults.c:876 876 STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this), Missing separate debuginfos, use: debuginfo-install glibc-2.12-1.25.el6_1.3.x86_64 libgcc-4.4.5-6.el6.x86_64 (gdb) bt #0 0x00007fe76381879b in default_flush (frame=0x7fe734587ae0, this=0x1c6d190, fd=0x7fe759c7dee4) at defaults.c:876 #1 0x00007fe76040e22d in pl_flush (frame=0x7fe734587a40, this=0x1c6e3e0, fd=0x7fe759c7dee4) at posix.c:449 #2 0x00007fe7601fa0c7 in iot_flush_wrapper (frame=<value optimized out>, this=0x1c6f2b0, fd=0x7fe759c7dee4) at io-threads.c:873 #3 0x00007fe763827794 in call_resume_wind (stub=0x7fe7625cc52c) at call-stub.c:2242 #4 call_resume (stub=0x7fe7625cc52c) at call-stub.c:3859 #5 0x00007fe7601ff009 in iot_worker (data=0x1c7ac60) at io-threads.c:129 #6 0x00000037b42077e1 in start_thread () from /lib64/libpthread.so.0 #7 0x00000037b3ae577d in clone () from /lib64/libc.so.6 (gdb) f 0 #0 0x00007fe76381879b in default_flush (frame=0x7fe734587ae0, this=0x1c6d190, fd=0x7fe759c7dee4) at defaults.c:876 876 STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this), (gdb) l 871 } 872 873 int32_t 874 default_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) 875 { 876 STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this), 877 FIRST_CHILD(this)->fops->flush, fd); 878 return 0; 879 } 880 (gdb) p *frame $1 = {root = 0x7fe7626a55a4, parent = 0x7fe734587a40, next = 0x7fe734587a40, prev = 0x1e53a10, local = 0x0, this = 0x1c6d190, ret = 0x7fe76040ae50 <pl_flush_cbk>, ref_count = 0, lock = 1, cookie = 0x7fe734587ae0, complete = _gf_false, op = GF_FOP_NULL, begin = { tv_sec = 0, tv_usec = 0}, end = {tv_sec = 0, tv_usec = 0}, wind_from = 0x7fe760413f59 "pl_flush", wind_to = 0x7fe760413810 "FIRST_CHILD(this)->fops->flush", unwind_from = 0x0, unwind_to = 0x7fe760413d30 "pl_flush_cbk"} (gdb) p *frame->this $2 = {name = 0x1c6be40 "mirror-access-control", type = 0x1c6cfc0 "features/access-control", next = 0x1c6be60, prev = 0x1c6e3e0, parents = 0x1c6e1f0, children = 0x1c6e240, options = 0x1c6c850, dlhandle = 0x1c6dae0, fops = 0x7fe760822b00, cbks = 0x7fe760822d90, dumpops = 0x0, volume_options = {next = 0x1c6e190, prev = 0x1c6e190}, fini = 0x7fe760619770 <fini>, init = 0x7fe76061f840 <init>, reconfigure = 0, mem_acct_init = 0x7fe763815560 <default_mem_acct_init>, validate_options = 0, notify = 0x7fe763815570 <default_notify>, loglevel = GF_LOG_NONE, latencies = {{min = 0, max = 0, total = 0, std = 0, mean = 0, count = 0} <repeats 45 times>}, ctx = 0x1c61010, graph = 0x1c67ad0, itable = 0x0, init_succeeded = 1 '\001', private = 0x1c7ae90, mem_acct = {num_types = 0, rec = 0x0}} (gdb) info thr 13 Thread 0x7fe75b2ee700 (LWP 29714) 0x00000037b420b3cc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 12 Thread 0x7fe7595a6700 (LWP 29719) 0x00000037b420b74b in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 11 Thread 0x7fe7594a5700 (LWP 29720) 0x00000037b420b74b in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 10 Thread 0x7fe7591a2700 (LWP 29724) 0x00000037b420b74b in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 9 Thread 0x7fe7592a3700 (LWP 29723) 0x00000037b420b74b in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 8 Thread 0x7fe7633ca700 (LWP 29705) 0x00000037b3ae5d73 in epoll_wait () from /lib64/libc.so.6 7 Thread 0x7fe760148700 (LWP 29715) 0x00000037b420b74b in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 6 Thread 0x7fe75a8ed700 (LWP 29716) 0x00000037b420b74b in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 5 Thread 0x7fe76143d700 (LWP 29708) 0x00000037b3aaabed in nanosleep () from /lib64/libc.so.6 4 Thread 0x7fe7596a7700 (LWP 29718) 0x00000037b420b74b in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 3 Thread 0x7fe7625ca700 (LWP 29706) 0x00000037b420f235 in sigwait () from /lib64/libpthread.so.0 2 Thread 0x7fe759c63700 (LWP 29717) 0x00000037b420b74b in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 * 1 Thread 0x7fe7593a4700 (LWP 29721) 0x00007fe76381879b in default_flush (frame=0x7fe734587ae0, this=0x1c6d190, fd=0x7fe759c7dee4) at defaults.c:876 (gdb)
CHANGE: http://review.gluster.com/774 (core/stack: perform locked winds) merged in master by Anand Avati (avati)
CHANGE: http://review.gluster.com/2629 (core/stack: perform locked winds) merged in release-3.2 by Anand Avati (avati)
Tested with glusterfs-3.3.0qa40. Ran replace-brick while i/o is going on the mount point with some bricks of the volume down. Source brick did not crash.