| Summary: | glusterd hung | ||
|---|---|---|---|
| Product: | [Community] GlusterFS | Reporter: | Raghavendra G <raghavendra> |
| Component: | rdma | Assignee: | Raghavendra G <raghavendra> |
| Status: | CLOSED CURRENTRELEASE | QA Contact: | |
| Severity: | high | Docs Contact: | |
| Priority: | low | ||
| Version: | mainline | CC: | gluster-bugs |
| Target Milestone: | --- | ||
| Target Release: | --- | ||
| Hardware: | All | ||
| OS: | Linux | ||
| Whiteboard: | |||
| Fixed In Version: | Doc Type: | Bug Fix | |
| Doc Text: | Story Points: | --- | |
| Clone Of: | Environment: | ||
| Last Closed: | Type: | --- | |
| Regression: | --- | Mount Type: | --- |
| Documentation: | --- | CRM: | |
| Verified Versions: | Category: | --- | |
| oVirt Team: | --- | RHEL 7.3 requirements from Atomic Host: | |
| Cloudforms Team: | --- | Target Upstream Version: | |
PATCH: http://patches.gluster.com/patch/4642 in master (rpc-transport/rdma: fix memory corruptions caused by rdma.) |
(gdb) info thr 5 Thread 0x4288a940 (LWP 17999) 0x00000038b5a0e4e8 in do_sigwait () from /lib64/libpthread.so.0 4 Thread 0x4328b940 (LWP 18000) 0x00000038b5a0d5cb in read () from /lib64/libpthread.so.0 3 Thread 0x43c8c940 (LWP 18001) __rdma_lookup_peer (device=0x13f3c7d8, qp_num=6291531) at rdma.c:2166 * 2 Thread 0x4468d940 (LWP 18002) 0x00000038b529a0b1 in nanosleep () from /lib64/libc.so.6 1 Thread 0x2b36c875e190 (LWP 17998) 0x00000038b5a0d174 in __lll_lock_wait () from /lib64/libpthread.so.0 (gdb) thr 3 [Switching to thread 3 (Thread 0x43c8c940 (LWP 18001))]#0 __rdma_lookup_peer (device=0x13f3c7d8, qp_num=6291531) at rdma.c:2166 2166 return peer; (gdb) bt #0 __rdma_lookup_peer (device=0x13f3c7d8, qp_num=6291531) at rdma.c:2166 #1 <function called from gdb> #2 0x00000038b5a0d174 in __lll_lock_wait () from /lib64/libpthread.so.0 #3 0x00000038b5a08aca in _L_lock_1034 () from /lib64/libpthread.so.0 #4 0x00000038b5a0898c in pthread_mutex_lock () from /lib64/libpthread.so.0 #5 0x00002b36c8329803 in rpc_transport_ref (this=0x13f60fe8) at rpc-transport.c:1075 #6 0x00002aaaaafa2c1a in rdma_recv_completion_proc (data=0x13f41a10) at rdma.c:3297 #7 0x00000038b5a064a7 in start_thread () from /lib64/libpthread.so.0 #8 0x00000038b52d3c2d in clone () from /lib64/libc.so.6 (gdb) thr 1 [Switching to thread 1 (Thread 0x2b36c875e190 (LWP 17998))]#0 0x00000038b5a0d174 in __lll_lock_wait () from /lib64/libpthread.so.0 (gdb) bt #0 0x00000038b5a0d174 in __lll_lock_wait () from /lib64/libpthread.so.0 #1 0x00000038b5a08aca in _L_lock_1034 () from /lib64/libpthread.so.0 #2 0x00000038b5a0898c in pthread_mutex_lock () from /lib64/libpthread.so.0 #3 0x00002aaaaafa0179 in rdma_register_peer (device=0x13f3c7d8, qp_num=6553677, peer=0x2aaab00c8310) at rdma.c:2087 #4 0x00002aaaaafa0782 in rdma_create_qp (this=0x2aaab00c8038) at rdma.c:2261 #5 0x00002aaaaafa6304 in rdma_server_event_handler (fd=12, idx=1, data=0x13f386a8, poll_in=1, poll_out=0, poll_err=0) at rdma.c:4609 #6 0x00002b36c80e9058 in event_dispatch_epoll_handler (event_pool=0x13f30328, events=0x13f61268, i=0) at event.c:812 #7 0x00002b36c80e9247 in event_dispatch_epoll (event_pool=0x13f30328) at event.c:876 #8 0x00002b36c80e95a3 in event_dispatch (event_pool=0x13f30328) at event.c:984 #9 0x0000000000405bf1 in main (argc=1, argv=0x7fffa643d518) at glusterfsd.c:1392 (gdb) thr 3 [Switching to thread 3 (Thread 0x43c8c940 (LWP 18001))]#0 __rdma_lookup_peer (device=0x13f3c7d8, qp_num=6291531) at rdma.c:2166 2166 return peer; (gdb) up #1 <function called from gdb> (gdb) #2 0x00000038b5a0d174 in __lll_lock_wait () from /lib64/libpthread.so.0 (gdb) #3 0x00000038b5a08aca in _L_lock_1034 () from /lib64/libpthread.so.0 (gdb) #4 0x00000038b5a0898c in pthread_mutex_lock () from /lib64/libpthread.so.0 (gdb) #5 0x00002b36c8329803 in rpc_transport_ref (this=0x13f60fe8) at rpc-transport.c:1075 1075 pthread_mutex_lock (&this->lock); (gdb) p *this $18 = {ops = 0x13f62130, listener = 0x2aaab00c2e10, private = 0x13f5f358, xl_private = 0xbaadf00d, xl = 0x241, mydata = 0xfbad2498, lock = {__data = {__lock = 2, __count = 10922, __owner = -1378861056, __nusers = 10922, __kind = -1378861056, __spins = 10922, __list = {__prev = 0x2aaaadd04000, __next = 0x2aaaadd04000}}, __size = "\002\000\000\000\252*\000\000\000@Ð\252*\000\000\000@Ð\252*\000\000\000@Ð\252*\000\000\000@Ð\252*\000 0", __align = 46909632806914}, refcount = -1378861056, ctx = 0x2aaaadd04000, options = 0x2aaaadd05000, name = 0x0, dnscache = 0x0, buf = 0x0, init = 0, fini = 0x13f37400, notify = 0xd, notify_data = 0x0, peerinfo = {sockaddr = {ss_family = 0, __ss_align = 334893296, __ss_padding = '\000' <repeats 17 times>, "\021\366\023", '\000' <repeats 28 times>"\377, \377\377\377", '\000' <repeats 20 times>, " \005U\265\070", '\000' <repeats 34 times>}, sockaddr_len = 0, identifier = '\000' <repeats 107 times>}, myinfo = {sockaddr = {ss_family = 0, __ss_align = 0, __ss_padding = '\000' <repeats 111 times>}, sockaddr_len = 0, identifier = '\000' <repeats 60 times>, "`\000U\265\070\000\000\000A\f\000\000\000\000\000\000\b\000\000\000\030\f\000\000\000\000\000\000\340\245\061\310\066+\000\000\276\272\376\312\001\000\000\000\f\000\000"}, list = { next = 0x100000001, prev = 0x70000000f}} As can be seen above, this is freed (or corrupted)