client mount hung while running plain files, directories creation and linux untar on a disperse volume. No bricks were brought down during IO. Below is the gdb of the process Backtrace: ========== (gdb) thread apply all bt Thread 8 (Thread 0x7f3e6dd5c700 (LWP 9691)): #0 0x00000032aa80efbd in nanosleep () at ../sysdeps/unix/syscall-template.S:82 #1 0x00000030770454da in gf_timer_proc (ctx=0x1d08010) at timer.c:195 #2 0x00000032aa807a51 in start_thread (arg=0x7f3e6dd5c700) at pthread_create.c:301 #3 0x00000032aa4e896d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:115 Thread 7 (Thread 0x7f3e6d35b700 (LWP 9692)): #0 __lll_lock_wait_private () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:97 #1 0x00000032aa47cd96 in _L_lock_2632 () at hooks.c:129 #2 0x00000032aa477105 in __libc_mallinfo () at malloc.c:4254 #3 0x000000307705abc9 in gf_proc_dump_mem_info () at statedump.c:302 #4 0x000000307705bac2 in gf_proc_dump_info (signum=<value optimized out>, ctx=0x1d08010) at statedump.c:818 #5 0x0000000000405df1 in glusterfs_sigwaiter (arg=<value optimized out>) at glusterfsd.c:1996 #6 0x00000032aa807a51 in start_thread (arg=0x7f3e6d35b700) at pthread_create.c:301 #7 0x00000032aa4e896d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:115 Thread 6 (Thread 0x7f3e6b11d700 (LWP 9695)): #0 __lll_lock_wait_private () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:97 #1 0x00000032aa47d29f in _L_lock_9730 () at hooks.c:129 #2 0x00000032aa47a88b in __libc_calloc (n=<value optimized out>, elem_size=<value optimized out>) at malloc.c:4094 #3 0x0000003077065a7e in __gf_default_calloc (size=2097152, cnt=1) at mem-pool.h:118 #4 0x0000003077066067 in synctask_create (env=0x1d35db0, fn=0x7f3e6a2c4bc0 <ec_synctask_heal_wrap>, cbk=0x7f3e6a2bc0c0 <ec_heal_done>, frame=<value optimized out>, opaque=0x7f3e594fed94) at syncop.c:497 #5 0x00000030770692b9 in synctask_new (env=<value optimized out>, fn=<value optimized out>, cbk=0x7f3e6a2bc0c0 <ec_heal_done>, frame=<value optimized out>, opaque=<value optimized out>) at syncop.c:566 #6 0x00007f3e6a2bc375 in ec_heal (frame=0x0, this=0x7f3e640265c0, target=18446744073709551615, minimum=-1, func=0x7f3e6a28b010 <ec_heal_report>, data=<value optimized out>, loc=0x7f3e435815b8, partial=0, xdata=0x0) at ec-heal.c:3707 #7 0x00007f3e6a28b27c in ec_check_status (fop=0x7f3e594e6f5c) at ec-common.c:167 #8 0x00007f3e6a2a699c in ec_combine (newcbk=0x7f3e590e2964, combine=<value optimized out>) at ec-combine.c:931 #9 0x00007f3e6a2a46d5 in ec_inode_write_cbk (frame=<value optimized out>, this=0x7f3e640265c0, cookie=<value optimized out>, op_ret=512, op_errno=<value optimized out>, prestat=0x7f3e6b11cb10, poststat=0x7f3e6b11caa0, xdata=0x7f3e73dd3460) at ec-inode-write.c:60 #10 0x00007f3e6a508a3c in client3_3_writev_cbk (req=<value optimized out>, iov=<value optimized out>, count=<value optimized out>, myframe=0x7f3e743ebe58) at client-rpc-fops.c:860 #11 0x000000307740ed75 in rpc_clnt_handle_reply (clnt=0x7f3e6452f7f0, pollin=0x7f3e435f4de0) at rpc-clnt.c:766 #12 0x0000003077410212 in rpc_clnt_notify (trans=<value optimized out>, mydata=0x7f3e6452f820, event=<value optimized out>, data=<value optimized out>) at rpc-clnt.c:894 #13 0x000000307740b8e8 in rpc_transport_notify (this=<value optimized out>, ---Type <return> to continue, or q <return> to quit--- event=<value optimized out>, data=<value optimized out>) at rpc-transport.c:543 #14 0x00007f3e6b34dbcd in socket_event_poll_in (this=0x7f3e6453f460) at socket.c:2290 #15 0x00007f3e6b34f6fd in socket_event_handler (fd=<value optimized out>, idx=<value optimized out>, data=0x7f3e6453f460, poll_in=1, poll_out=0, poll_err=0) at socket.c:2403 #16 0x0000003077080f70 in event_dispatch_epoll_handler (data=0x1d70680) at event-epoll.c:572 #17 event_dispatch_epoll_worker (data=0x1d70680) at event-epoll.c:674 #18 0x00000032aa807a51 in start_thread (arg=0x7f3e6b11d700) at pthread_create.c:301 #19 0x00000032aa4e896d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:115 Thread 5 (Thread 0x7f3e60acd700 (LWP 9720)): #0 0x00000032aa4df143 in __poll (fds=<value optimized out>, nfds=<value optimized out>, timeout=<value optimized out>) at ../sysdeps/unix/sysv/linux/poll.c:87 #1 0x00000032aa516010 in svc_run () at svc_run.c:84 #2 0x00007f3e697b2e54 in nsm_thread (argv=<value optimized out>) at nlmcbk_svc.c:121 #3 0x00000032aa807a51 in start_thread (arg=0x7f3e60acd700) at pthread_create.c:301 #4 0x00000032aa4e896d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:115 Thread 4 (Thread 0x7f3e5bfff700 (LWP 9721)): #0 0x00000032aa4e8f63 in epoll_wait () at ../sysdeps/unix/syscall-template.S:82 #1 0x0000003077080dd9 in event_dispatch_epoll_worker (data=0x7f3e640c4cc0) at event-epoll.c:664 #2 0x00000032aa807a51 in start_thread (arg=0x7f3e5bfff700) at pthread_create.c:301 #3 0x00000032aa4e896d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:115 Thread 3 (Thread 0x7f3e5365c700 (LWP 9772)): #0 __lll_lock_wait_private () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:97 #1 0x00000032aa47cf7e in _L_lock_5746 () at hooks.c:129 #2 0x00000032aa478a8b in _int_free (av=0x32aa78fe80, p=0x1d71760, have_lock=0) at malloc.c:4967 #3 0x00000030770690d2 in synctask_destroy (task=0x7f3e43605900) at syncop.c:391 #4 0x00000030770695a0 in syncenv_processor (thdata=0x1d36530) at syncop.c:687 #5 0x00000032aa807a51 in start_thread (arg=0x7f3e5365c700) at pthread_create.c:301 #6 0x00000032aa4e896d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:115 Thread 2 (Thread 0x7f3e2bfff700 (LWP 10950)): #0 pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:239 #1 0x00000030770650db in syncenv_task (proc=0x1d36cb0) at syncop.c:591 #2 0x00000030770695b0 in syncenv_processor (thdata=0x1d36cb0) at syncop.c:683 #3 0x00000032aa807a51 in start_thread (arg=0x7f3e2bfff700) at pthread_create.c:301 #4 0x00000032aa4e896d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:115 Thread 1 (Thread 0x7f3e750e4740 (LWP 9690)): ---Type <return> to continue, or q <return> to quit--- #0 0x00000032aa8082ad in pthread_join (threadid=139906061031168, thread_return=0x0) at pthread_join.c:89 #1 0x0000003077080a6d in event_dispatch_epoll (event_pool=0x1d26c90) at event-epoll.c:759 #2 0x0000000000407ad4 in main (argc=11, argv=0x7fff254647f8) at glusterfsd.c:2326 (gdb) (gdb) (gdb) (gdb)
This is identified as glibc bug with malloc. So the particular version of kernel Bhaskar tested with has the bug. Closing this as not a bug in gluster.