Have 2 fuse clients, 2 nfs clients do untar of linux dir on one fuse, one nfs mount and rm -rf on the others in while loop after 5 minutes the crash happened. #4 0x00007f652da2283a in dht_subvol_get_hashed (this=0x24721b0, loc=0x7f652b11fe74) at dht-helper.c:369 369 gf_log (this->name, GF_LOG_DEBUG, (gdb) l 364 } 365 366 layout = dht_layout_get (this, loc->parent); 367 368 if (!layout) { 369 gf_log (this->name, GF_LOG_DEBUG, 370 "layout missing path=%s parent=%s", 371 loc->path, uuid_utoa (loc->parent->gfid)); 372 goto out; 373 } (gdb) p loc->parent $1 = (inode_t *) 0x0 (gdb) p *loc $2 = {path = 0x257c7d0 "<gfid:00000000-0000-0000-0000-", '0' <repeats 12 times>, ">", name = 0x0, inode = 0x7f652bd603e4, parent = 0x0, gfid = '\000' <repeats 15 times>, pargfid = '\000' <repeats 15 times>} #0 0x00007f65318e276c in uuid_unpack (in=0x8 <Address 0x8 out of bounds>, uu=0x7fffe0cf0be0) at ../../contrib/uuid/unpack.c:43 ptr = 0x8 <Address 0x8 out of bounds> tmp = 0 #1 0x00007f65318e2162 in uuid_unparse_x (uu=0x8 <Address 0x8 out of bounds>, out=0x2490dd0 "00000000-0000-0000-0000-", '0' <repeats 12 times>, fmt=0x7f65318fec28 "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x") at ../../contrib/uuid/unparse.c:55 uuid = {time_low = 0, time_mid = 0, time_hi_and_version = 0, clock_seq = 0, node = "\000\000\000\000\000"} #2 0x00007f65318e2283 in uuid_unparse (uu=0x8 <Address 0x8 out of bounds>, out=0x2490dd0 "00000000-0000-0000-0000-", '0' <repeats 12 times>) at ../../contrib/uuid/unparse.c:75 No locals. #3 0x00007f65318b6932 in uuid_utoa (uuid=0x8 <Address 0x8 out of bounds>) at common-utils.c:1750 uuid_buffer = 0x2490dd0 "00000000-0000-0000-0000-", '0' <repeats 12 times> #4 0x00007f652da2283a in dht_subvol_get_hashed (this=0x24721b0, loc=0x7f652b11fe74) at dht-helper.c:369 layout = 0x0 subvol = 0x0 __FUNCTION__ = "dht_subvol_get_hashed" #5 0x00007f652da3aec6 in dht_lookup (frame=0x7f65304c7520, this=0x24721b0, loc=0x7f652b11fe74, xattr_req=0x257d660) at dht-common.c:1337 subvol = 0x0 hashed_subvol = 0x0 local = 0x257ae60 conf = 0x2484fd0 ret = 0 op_errno = -1 layout = 0x0 i = 0 call_cnt = 0 new_loc = {path = 0x0, name = 0x0, inode = 0x0, parent = 0x0, gfid = '\000' <repeats 15 times>, pargfid = '\000' <repeats 15 times>} __FUNCTION__ = "dht_lookup" #6 0x00007f65318b2438 in default_lookup (frame=0x7f65304c731c, this=0x2473490, loc=0x7f652b11fe74, xattr_req=0x257d660) at defaults.c:1183 _new = 0x7f65304c7520 old_THIS = 0x2473490 tmp_cbk = 0x7f65318a2e00 <default_lookup_cbk> __FUNCTION__ = "default_lookup" #7 0x00007f65318b2438 in default_lookup (frame=0x7f65304c8494, this=0x24747a0, loc=0x7f652b11fe74, xattr_req=0x257d660) at defaults.c:1183 _new = 0x7f65304c731c old_THIS = 0x24747a0 tmp_cbk = 0x7f65318a2e00 <default_lookup_cbk> __FUNCTION__ = "default_lookup" #8 0x00007f652d3e7bf8 in ioc_lookup (frame=0x7f65304c75cc, this=0x24759d0, loc=0x7f652b11fe74, xattr_req=0x257d660) at io-cache.c:295 _new = 0x7f65304c8494 old_THIS = 0x24759d0 tmp_cbk = 0x7f652d3e7359 <ioc_lookup_cbk> local = 0x254f560 op_errno = -1 ret = 0 __FUNCTION__ = "ioc_lookup"
This happens, when parallel rm and create happens. The lookup would succeed on the dir, and a create proceeds, while on another mount, the rmdir succeeds. This leaves the parent = NULL. The fix is to handle this gracefully.
CHANGE: http://review.gluster.com/2757 (cluster/dht: Handle get cached/hashed subvol failures gracefully) merged in master by Vijay Bellur (vijay)
Verified on 3.3.0 qa45