| Summary: | [glusterfs-3.3.0qa1]: glusterfs client crashed | ||
|---|---|---|---|
| Product: | [Community] GlusterFS | Reporter: | Raghavendra Bhat <rabhat> |
| Component: | replicate | Assignee: | Pranith Kumar K <pkarampu> |
| Status: | CLOSED CURRENTRELEASE | QA Contact: | |
| Severity: | high | Docs Contact: | |
| Priority: | low | ||
| Version: | pre-release | CC: | gluster-bugs |
| Target Milestone: | --- | ||
| Target Release: | --- | ||
| Hardware: | x86_64 | ||
| OS: | Linux | ||
| Whiteboard: | |||
| Fixed In Version: | Doc Type: | Bug Fix | |
| Doc Text: | Story Points: | --- | |
| Clone Of: | Environment: | ||
| Last Closed: | Type: | --- | |
| Regression: | --- | Mount Type: | --- |
| Documentation: | --- | CRM: | |
| Verified Versions: | Category: | --- | |
| oVirt Team: | --- | RHEL 7.3 requirements from Atomic Host: | |
| Cloudforms Team: | --- | Target Upstream Version: | |
|
Description
Raghavendra Bhat
2011-09-27 04:20:54 UTC
After the tests of bug 765369, tried to remove everything from the mount point by doing rm -rf from both fuse and nfs clients simultaneously. The fuse client crashed with the following backtrace. Core was generated by `/usr/local/sbin/glusterfs --volfile-id=mirror --volfile-server=10.1.11.73 /clie'. Program terminated with signal 6, Aborted. #0 0x00007f270a314a75 in raise () from /lib/libc.so.6 (gdb) bt #0 0x00007f270a314a75 in raise () from /lib/libc.so.6 #1 0x00007f270a3185c0 in abort () from /lib/libc.so.6 #2 0x00007f270a30d941 in __assert_fail () from /lib/libc.so.6 #3 0x00007f2704bb93e0 in afr_inode_set_read_ctx (this=0x7f26edf52ea0, inode=0x7f269bd24d20, read_child=-1, fresh_children=0x12b8180) at ../../../../../xlators/cluster/afr/src/afr-common.c:417 #4 0x00007f2704ba87f1 in afr_sh_entry_fix (frame=0x7f270955a37c, this=0x7f26edf52ea0) at ../../../../../xlators/cluster/afr/src/afr-self-heal-entry.c:2259 #5 0x00007f2704ba88c1 in afr_sh_entry_lookup_cbk (frame=0x7f270955a37c, cookie=0x1, this=0x7f26edf52ea0, op_ret=-1, op_errno=2, inode=0x7f269bd24d20, buf=0x7fffb9b6ca10, xattr=0x0, postparent=0x7fffb9b6c9a0) at ../../../../../xlators/cluster/afr/src/afr-self-heal-entry.c:2288 #6 0x00007f2704dfc01e in client3_1_lookup_cbk (req=0x7f269c2fc060, iov=0x7f269c2fc0a0, count=1, myframe=0x7f27097d8778) at ../../../../../xlators/protocol/client/src/client3_1-fops.c:2255 #7 0x00007f270acb130d in rpc_clnt_handle_reply (clnt=0x7f26edf5fc10, pollin=0x15c3da0) at ../../../../rpc/rpc-lib/src/rpc-clnt.c:789 #8 0x00007f270acb165a in rpc_clnt_notify (trans=0x7f26edf5ff40, mydata=0x7f26edf5fc40, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x15c3da0) at ../../../../rpc/rpc-lib/src/rpc-clnt.c:902 #9 0x00007f270acad6b8 in rpc_transport_notify (this=0x7f26edf5ff40, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x15c3da0) at ../../../../rpc/rpc-lib/src/rpc-transport.c:498 #10 0x00007f2707e327d0 in socket_event_poll_in (this=0x7f26edf5ff40) at ../../../../../rpc/rpc-transport/socket/src/socket.c:1675 #11 0x00007f2707e32d54 in socket_event_handler (fd=340, idx=221, data=0x7f26edf5ff40, poll_in=1, poll_out=0, poll_err=0) at ../../../../../rpc/rpc-transport/socket/src/socket.c:1790 #12 0x00007f270af072fa in event_dispatch_epoll_handler (event_pool=0xf1f7c0, events=0xf24d70, i=0) at ../../../libglusterfs/src/event.c:794 #13 0x00007f270af0751d in event_dispatch_epoll (event_pool=0xf1f7c0) at ../../../libglusterfs/src/event.c:856 #14 0x00007f270af078a8 in event_dispatch (event_pool=0xf1f7c0) at ../../../libglusterfs/src/event.c:956 #15 0x000000000040837c in main (argc=4, argv=0x7fffb9b6cfc8) at ../../../glusterfsd/src/glusterfsd.c:1592 (gdb) f 3 #3 0x00007f2704bb93e0 in afr_inode_set_read_ctx (this=0x7f26edf52ea0, inode=0x7f269bd24d20, read_child=-1, fresh_children=0x12b8180) at ../../../../../xlators/cluster/afr/src/afr-common.c:417 417 GF_ASSERT (read_child >= 0); (gdb) p read_child $1 = -1 (gdb) f 4 #4 0x00007f2704ba87f1 in afr_sh_entry_fix (frame=0x7f270955a37c, this=0x7f26edf52ea0) at ../../../../../xlators/cluster/afr/src/afr-self-heal-entry.c:2259 2259 afr_inode_set_read_ctx (this, sh->inode, sh->source, (gdb) p sh->source $2 = -1 (gdb) l 2254 sh->source = source; 2255 2256 afr_reset_children (sh->fresh_children, priv->child_count); 2257 afr_get_fresh_children (sh->success_children, sh->sources, 2258 sh->fresh_children, priv->child_count); 2259 afr_inode_set_read_ctx (this, sh->inode, sh->source, 2260 sh->fresh_children); 2261 2262 2263 heal: (gdb) source source command requires file name of file to source. (gdb) p source $3 = -1 (gdb) l afr_sh_entry_fix 2217 } 2218 2219 2220 int 2221 afr_sh_entry_fix (call_frame_t *frame, xlator_t *this) 2222 { 2223 afr_local_t *local = NULL; 2224 afr_self_heal_t *sh = NULL; 2225 afr_private_t *priv = NULL; 2226 int source = 0; (gdb) 2227 2228 int nsources = 0; 2229 2230 local = frame->local; 2231 sh = &local->self_heal; 2232 priv = this->private; 2233 2234 if (sh->forced_merge) { 2235 sh->source = -1; 2236 goto heal; (gdb) 2237 } 2238 2239 nsources = afr_build_sources (this, sh->xattr, sh->buf, 2240 sh->pending_matrix, sh->sources, 2241 sh->success_children, 2242 AFR_ENTRY_TRANSACTION); 2243 if (nsources == 0) { 2244 gf_log (this->name, GF_LOG_TRACE, 2245 "No self-heal needed for %s", 2246 local->loc.path); (gdb) 2247 2248 afr_sh_entry_finish (frame, this); 2249 return 0; 2250 } 2251 2252 source = afr_sh_select_source (sh->sources, priv->child_count); 2253 2254 sh->source = source; l afr_build_sources 607 608 int 609 afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs, 610 int32_t **pending_matrix, int32_t *sources, 611 int32_t *success_children, afr_transaction_type type) 612 { 613 afr_private_t *priv = NULL; 614 afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID; 615 int nsources = -1; 616 (gdb) 617 priv = this->private; 618 619 if (afr_get_children_count (success_children, priv->child_count) == 0) 620 goto out; 621 622 afr_build_pending_matrix (priv->pending_key, pending_matrix, 623 xattr, type, priv->child_count); 624 625 sh_type = afr_self_heal_type_for_transaction (type); 626 if (AFR_SELF_HEAL_INVALID == sh_type) (gdb) 627 goto out; 628 629 afr_sh_print_pending_matrix (pending_matrix, this); 630 631 nsources = afr_mark_sources (sources, pending_matrix, bufs, 632 priv->child_count, sh_type, 633 success_children, this->name); 634 out: 635 return nsources; 636 } Here nsources can be -1 if it cannot identify the sources properly. But we are checking nsources only for 0 after calling afr_build_sources and instead of returning we are continuing with read_child set to -1. The crash is observed because the entry-self-heal goes ahead with the self-heal even when the lookups failed, this has already been fixed as part of 3557 patch. Marking it resolved fixed CHANGE: http://review.gluster.com/556 (Change-Id: Ifdf0db71594ce526ad85c21103726798d9aceef4) merged in master by Vijay Bellur (vijay) |