Bug 786060 - [c3aa99d907591f72b6302287b9b8899514fb52f1]: client & shd crash when one export dir is deleted & created back
Summary: [c3aa99d907591f72b6302287b9b8899514fb52f1]: client & shd crash when one expor...
Keywords:
Status: CLOSED CURRENTRELEASE
Alias: None
Product: GlusterFS
Classification: Community
Component: replicate
Version: pre-release
Hardware: Unspecified
OS: Unspecified
urgent
high
Target Milestone: ---
Assignee: Pranith Kumar K
QA Contact:
URL:
Whiteboard:
: 795321 (view as bug list)
Depends On:
Blocks: 817967
TreeView+ depends on / blocked
 
Reported: 2012-01-31 10:27 UTC by Rahul C S
Modified: 2013-07-24 17:36 UTC (History)
4 users (show)

Fixed In Version: glusterfs-3.4.0
Doc Type: Bug Fix
Doc Text:
Clone Of:
Environment:
Last Closed: 2013-07-24 17:36:17 UTC
Regression: ---
Mount Type: ---
Documentation: ---
CRM:
Verified Versions: 1f3a0dd4742a2fcd3215aee4a5e22125d7ea4f4d
Embargoed:


Attachments (Terms of Use)

Description Rahul C S 2012-01-31 10:27:03 UTC
Description of problem:
Created a striped replicate volume(2x2), i was running few tests from 2 mount points, while running the `gluster volume heal` command.

Then i deleted an export directory & recreated it.

Clients & self heal daemon crashed due to assert.

Core backtrace:
Core was generated by `/usr/local/sbin/glusterfs --acl --entry-timeout=0 --volfile-id=vol --volfile-se'.
Program terminated with signal 6, Aborted.
#0  0x00007f16bc1cf3a5 in __GI_raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
64	../nptl/sysdeps/unix/sysv/linux/raise.c: No such file or directory.
	in ../nptl/sysdeps/unix/sysv/linux/raise.c
(gdb) bt
#0  0x00007f16bc1cf3a5 in __GI_raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
#1  0x00007f16bc1d2b0b in __GI_abort () at abort.c:92
#2  0x00007f16bc1c7d4d in __GI___assert_fail (assertion=0x7f16b748dedb "local->loc.parent", file=<optimized out>, line=1894, 
    function=<optimized out>) at assert.c:81
#3  0x00007f16b745bda4 in afr_self_heal_parent_entrylk (frame=0x7f16ba19e070, this=0x7f16a43915c0, 
    lock_cbk=0x7f16b745b97d <afr_sh_post_nb_entrylk_conflicting_sh_cbk>)
    at ../../../../../xlators/cluster/afr/src/afr-self-heal-common.c:1894
#4  0x00007f16b745be19 in afr_self_heal_conflicting_entries (frame=0x7f16ba19e070, this=0x7f16a43915c0)
    at ../../../../../xlators/cluster/afr/src/afr-self-heal-common.c:1905
#5  0x00007f16b745c974 in afr_self_heal (frame=0x7f16bb1e837c, this=0x7f16a43915c0, inode=0x7f16bb9c704c)
    at ../../../../../xlators/cluster/afr/src/afr-self-heal-common.c:2146
#6  0x00007f16b747d435 in afr_launch_self_heal (frame=0x7f16bb1e837c, this=0x7f16a43915c0, inode=0x7f16bb9c704c, background=_gf_true, 
    ia_type=IA_IFDIR, reason=0x7f16b74923e0 "lookup detected pending operations", 
    gfid_sh_success_cbk=0x7f16b747dfde <afr_post_gfid_sh_success>, unwind=0x7f16b747dd7a <afr_self_heal_lookup_unwind>)
    at ../../../../../xlators/cluster/afr/src/afr-common.c:1290
#7  0x00007f16b747e39e in afr_lookup_perform_self_heal (frame=0x7f16bb1e837c, this=0x7f16a43915c0, sh_launched=0x7fffa22917a4)
    at ../../../../../xlators/cluster/afr/src/afr-common.c:1583
#8  0x00007f16b747e97e in afr_lookup_done (frame=0x7f16bb1e837c, this=0x7f16a43915c0)
    at ../../../../../xlators/cluster/afr/src/afr-common.c:1733
#9  0x00007f16b747f223 in afr_lookup_cbk (frame=0x7f16bb1e837c, cookie=0x1, this=0x7f16a43915c0, op_ret=0, op_errno=0, 
    inode=0x7f16bb9c704c, buf=0x7fffa22919d0, xattr=0x25876b0, postparent=0x7fffa2291a40)
    at ../../../../../xlators/cluster/afr/src/afr-common.c:1904
#10 0x00007f16b76bf93e in client3_1_lookup_cbk (req=0x7f16b015fa80, iov=0x7f16b015fac0, count=1, myframe=0x7f16bb1e7ac0)
    at ../../../../../xlators/protocol/client/src/client3_1-fops.c:2292
#11 0x00007f16bc97fe02 in rpc_clnt_handle_reply (clnt=0x7f16a4336df0, pollin=0x258bc40) at ../../../../rpc/rpc-lib/src/rpc-clnt.c:790
#12 0x00007f16bc980163 in rpc_clnt_notify (trans=0x7f16a4472ed0, mydata=0x7f16a4336e20, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x258bc40)
    at ../../../../rpc/rpc-lib/src/rpc-clnt.c:909
#13 0x00007f16bc97c02c in rpc_transport_notify (this=0x7f16a4472ed0, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x258bc40)
    at ../../../../rpc/rpc-lib/src/rpc-transport.c:498
#14 0x00007f16b82f72db in socket_event_poll_in (this=0x7f16a4472ed0) at ../../../../../rpc/rpc-transport/socket/src/socket.c:1675
#15 0x00007f16b82f7844 in socket_event_handler (fd=17, idx=6, data=0x7f16a4472ed0, poll_in=1, poll_out=0, poll_err=0)
    at ../../../../../rpc/rpc-transport/socket/src/socket.c:1790
#16 0x00007f16bcbd4ddc in event_dispatch_epoll_handler (event_pool=0x25582d0, events=0x255db50, i=1)
    at ../../../libglusterfs/src/event.c:794
#17 0x00007f16bcbd4fef in event_dispatch_epoll (event_pool=0x25582d0) at ../../../libglusterfs/src/event.c:856
#18 0x00007f16bcbd5362 in event_dispatch (event_pool=0x25582d0) at ../../../libglusterfs/src/event.c:956
---Type <return> to continue, or q <return> to quit---
#19 0x0000000000407ede in main (argc=6, argv=0x7fffa2291fa8) at ../../../glusterfsd/src/glusterfsd.c:1601
(gdb) f 3
#3  0x00007f16b745bda4 in afr_self_heal_parent_entrylk (frame=0x7f16ba19e070, this=0x7f16a43915c0, 
    lock_cbk=0x7f16b745b97d <afr_sh_post_nb_entrylk_conflicting_sh_cbk>)
    at ../../../../../xlators/cluster/afr/src/afr-self-heal-common.c:1894
1894	        GF_ASSERT (local->loc.parent);
(gdb) p local->loc
$1 = {path = 0x270b820 "/", name = 0x270b821 "", inode = 0x7f16bb9c704c, parent = 0x0, gfid = '\000' <repeats 15 times>, "\001", 
  pargfid = '\000' <repeats 15 times>}
(gdb) f 3
#3  0x00007f16b745bda4 in afr_self_heal_parent_entrylk (frame=0x7f16ba19e070, this=0x7f16a43915c0, 
    lock_cbk=0x7f16b745b97d <afr_sh_post_nb_entrylk_conflicting_sh_cbk>)
    at ../../../../../xlators/cluster/afr/src/afr-self-heal-common.c:1894
1894	        GF_ASSERT (local->loc.parent);
(gdb) l afr_self_heal_parent_entrylk
1878	}
1879	
1880	static int
1881	afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this,
1882	                              afr_lock_cbk_t lock_cbk)
1883	{
1884	        afr_local_t         *local    = NULL;
1885	        afr_self_heal_t     *sh       = NULL;
1886	
1887	        local    = frame->local;
(gdb) 
1888	        sh       = &local->self_heal;
1889	
1890	        gf_log (this->name, GF_LOG_TRACE,
1891	                "attempting to recreate missing entries for path=%s",
1892	                local->loc.path);
1893	
1894	        GF_ASSERT (local->loc.parent);
1895	        afr_build_parent_loc (&sh->parent_loc, &local->loc);
1896	
1897	        afr_sh_entrylk (frame, this, &sh->parent_loc, NULL,
(gdb) 
1898	                        lock_cbk);
1899	        return 0;
1900	}
1901	
1902	static int
1903	afr_self_heal_conflicting_entries (call_frame_t *frame, xlator_t *this)
1904	{
1905	        afr_self_heal_parent_entrylk (frame, this,
1906	                                      afr_sh_post_nb_entrylk_conflicting_sh_cbk);
1907	        return 0;
(gdb) p local->loc.path
$2 = 0x270b820 "/"
(gdb) p local->loc
$3 = {path = 0x270b820 "/", name = 0x270b821 "", inode = 0x7f16bb9c704c, parent = 0x0, gfid = '\000' <repeats 15 times>, "\001", 
  pargfid = '\000' <repeats 15 times>}
(gdb) 
$4 = {path = 0x270b820 "/", name = 0x270b821 "", inode = 0x7f16bb9c704c, parent = 0x0, gfid = '\000' <repeats 15 times>, "\001", 
  pargfid = '\000' <repeats 15 times>}
(gdb) p local->loc.inode
$5 = (inode_t *) 0x7f16bb9c704c
(gdb) p *local->loc.inode
$6 = {table = 0x270a710, gfid = '\000' <repeats 15 times>, "\001", lock = 1, nlookup = 0, ref = 69497, ia_type = IA_IFDIR, fd_list = {
    next = 0x7f16bb9c707c, prev = 0x7f16bb9c707c}, dentry_list = {next = 0x7f16bb9c708c, prev = 0x7f16bb9c708c}, hash = {
    next = 0x7f16bb9c709c, prev = 0x7f16bb9c709c}, list = {next = 0x270a770, prev = 0x270a770}, _ctx = 0x269a4e0}

Comment 1 Pranith Kumar K 2012-02-24 08:36:07 UTC
*** Bug 795321 has been marked as a duplicate of this bug. ***

Comment 2 Pranith Kumar K 2012-02-28 04:16:50 UTC
The patch I sent for 787671 also handles this case.

Comment 3 Pranith Kumar K 2012-03-01 17:24:42 UTC
Fixed as part of 787671

Comment 4 Rahul C S 2012-04-05 10:30:56 UTC
No crashes found.


Note You need to log in before you can comment on or make changes to this bug.