Hide Forgot
submitted patch http://patches.gluster.com/patch/4853/ this prevents the crash.. but the root cause still needs to be understood and handled properly. the 'callingfn()' log should help in case of failures anyways. -Amar
PATCH: http://patches.gluster.com/patch/4853 in master (check for 'link_inode' in server protocol)
while running mixed test with 4 dht server + 7 clients , following core found with 3.1.0qa26. ------------- (gdb) bt full #0 0x00002aaaab575152 in pthread_spin_lock () from /lib64/libpthread.so.0 No symbol table info available. #1 0x00002aaaaad04c3f in fd_bind (fd=0x2aaab0f3f64c) at fd.c:493 inode = (inode_t *) 0x0 __FUNCTION__ = "fd_bind" #2 0x00002aaaadc11530 in server_create_cbk (frame=0x2aaaac550514, cookie=0x2aaaac33fda4, this=0x63b778, op_ret=0, op_errno=0, fd=0x2aaab0f3f64c, inode=0x2aaaae0b9b0c, stbuf=0x42d07ad0, preparent=0x42d07a50, postparent=0x42d079e0) at server3_1-fops.c:1384 conn = (server_connection_t *) 0x2aaab40015f8 state = (server_state_t *) 0x2aaab4e140a8 link_inode = (inode_t *) 0x0 req = (rpcsvc_request_t *) 0x0 fd_no = 0 rsp = {gfs_id = 0, op_ret = 0, op_errno = 0, stat = {ia_gfid = '\0' <repeats 15 times>, ia_ino = 0, ia_dev = 0, mode = 0, ia_nlink = 0, ia_uid = 0, ia_gid = 0, ia_rdev = 0, ia_size = 0, ia_blksize = 0, ia_blocks = 0, ia_atime = 0, ia_atime_nsec = 0, ia_mtime = 0, ia_mtime_nsec = 0, ia_ctime = 0, ia_ctime_nsec = 0}, fd = 0, preparent = {ia_gfid = '\0' <repeats 15 times>, ia_ino = 0, ia_dev = 0, mode = 0, ia_nlink = 0, ia_uid = 0, ia_gid = 0, ia_rdev = 0, ia_size = 0, ia_blksize = 0, ia_blocks = 0, ia_atime = 0, ia_atime_nsec = 0, ia_mtime = 0, ia_mtime_nsec = 0, ia_ctime = 0, ia_ctime_nsec = 0}, postparent = { ia_gfid = '\0' <repeats 15 times>, ia_ino = 0, ia_dev = 0, mode = 0, ia_nlink = 0, ia_uid = 0, ia_gid = 0, ia_rdev = 0, ia_size = 0, ia_blksize = 0, ia_blocks = 0, ia_atime = 0, ia_atime_nsec = 0, ia_mtime = 0, ia_mtime_nsec = 0, ia_ctime = 0, ia_ctime_nsec = 0}} __FUNCTION__ = "server_create_cbk" #3 0x00002aaaad9ef21f in iot_create_cbk (frame=0x2aaaac33fda4, cookie=0x2aaaac340770, this=0x63a4d8, op_ret=0, op_errno=0, fd=0x2aaab0f3f64c, inode=0x2aaaae0b9b0c, stbuf=0x42d07ad0, preparent=0x42d07a50, postparent=0x42d079e0) at io-threads.c:736 fn = (fop_create_cbk_t) 0x2aaaadc1138a <server_create_cbk> _parent = (call_frame_t *) 0x2aaaac550514 old_THIS = (xlator_t *) 0x63a4d8 __FUNCTION__ = "iot_create_cbk" #4 0x00002aaaaace7179 in default_create_cbk (frame=0x2aaaac340770, cookie=0x2aaaac340e24, this=0x6392c8, op_ret=0, op_errno=0, fd=0x2aaab0f3f64c, inode=0x2aaaae0b9b0c, buf=0x42d07ad0, preparent=0x42d07a50, postparent=0x42d079e0) at defaults.c:461 fn = (ret_fn_t) 0x2aaaad9ef0b2 <iot_create_cbk> _parent = (call_frame_t *) 0x2aaaac33fda4 old_THIS = (xlator_t *) 0x6392c8 __FUNCTION__ = "default_create_cbk" #5 0x00002aaaad330d87 in pl_create_cbk (frame=0x2aaaac340e24, cookie=0x2aaaac33fb94, this=0x636da8, op_ret=0, op_errno=0, fd=0x2aaab0f3f64c, inode=0x2aaaae0b9b0c, buf=0x42d07ad0, preparent=0x42d07a50, postparent=0x42d079e0) at posix.c:460 fn = (fop_create_cbk_t) 0x2aaaaace7002 <default_create_cbk> _parent = (call_frame_t *) 0x2aaaac340770 old_THIS = (xlator_t *) 0x636da8 Missing separate debuginfos, use: debuginfo-install gcc.x86_64 glibc.x86_64 ---Type <return> to continue, or q <return> to quit--- dummy = 1 ret = 0 __FUNCTION__ = "pl_create_cbk" #6 0x00002aaaaace7179 in default_create_cbk (frame=0x2aaaac33fb94, cookie=0x2aaaac341ea4, this=0x635b88, op_ret=0, op_errno=0, fd=0x2aaab0f3f64c, inode=0x2aaaae0b9b0c, buf=0x42d07ad0, preparent=0x42d07a50, postparent=0x42d079e0) at defaults.c:461 fn = (ret_fn_t) 0x2aaaad330b95 <pl_create_cbk> _parent = (call_frame_t *) 0x2aaaac340e24 old_THIS = (xlator_t *) 0x635b88 __FUNCTION__ = "default_create_cbk" #7 0x00002aaaacf0a1fb in posix_create (frame=0x2aaaac341ea4, this=0x6348d8, loc=0x2aaaac77957c, flags=2, mode=1285, fd=0x2aaab0f3f64c, params=0x2aaab471b9a8) at posix.c:2086 fn = (fop_create_cbk_t) 0x2aaaaace7002 <default_create_cbk> _parent = (call_frame_t *) 0x2aaaac33fb94 old_THIS = (xlator_t *) 0x6348d8 op_ret = 0 op_errno = 0 _fd = 296 _flags = 66 real_path = 0x42d07900 "/mnt/dir1//dir1/nfsalpha200/ip-10-243-113-224/test1/fstest_e714a0ea8375c01adf3eb26b4ba4942d" stbuf = {ia_ino = 17548696, ia_gfid = {0 '\0' <repeats 16 times>}, ia_dev = 2064, ia_type = IA_IFREG, ia_prot = {suid = 0 '\0', sgid = 0 '\0', sticky = 1 '\001', owner = {read = 0 '\0', write = 0 '\0', exec = 0 '\0'}, group = {read = 0 '\0', write = 0 '\0', exec = 0 '\0'}, other = {read = 0 '\0', write = 0 '\0', exec = 0 '\0'}}, ia_nlink = 1, ia_uid = 0, ia_gid = 0, ia_rdev = 0, ia_size = 0, ia_blksize = 4096, ia_blocks = 8, ia_atime = 1284792806, ia_atime_nsec = 0, ia_mtime = 1284792806, ia_mtime_nsec = 0, ia_ctime = 1284792808, ia_ctime_nsec = 0} pfd = (struct posix_fd *) 0x2aaab526c448 priv = (struct posix_private *) 0x645158 was_present = 1 '\001' gid = 0 pathdup = 0x2aaab4de8258 "/mnt/dir1//dir1/nfsalpha200/ip-10-243-113-224/test1" parentpath = 0x2aaab4de8258 "/mnt/dir1//dir1/nfsalpha200/ip-10-243-113-224/test1" preparent = {ia_ino = 17548678, ia_gfid = {107 'k', 4 '\004', 232 '�', 38 '&', 137 '\211', 36 '$', 66 'B', 131 '\203', 171 '�', 157 '\235', 186 '�', 227 '�', 139 '\213', 179 '�', 186 '�', 54 '6'}, ia_dev = 2064, ia_type = IA_IFDIR, ia_prot = {suid = 0 '\0', sgid = 0 '\0', sticky = 0 '\0', owner = {read = 1 '\001', write = 1 '\001', exec = 1 '\001'}, group = {read = 1 '\001', write = 0 '\0', exec = 1 '\001'}, other = {read = 1 '\001', write = 0 '\0', exec = 1 '\001'}}, ia_nlink = 3, ia_uid = 0, ia_gid = 0, ia_rdev = 0, ia_size = 4096, ia_blksize = 4096, ia_blocks = 16, ia_atime = 1284792398, ia_atime_nsec = 0, ia_mtime = 1284792806, ia_mtime_nsec = 0, ia_ctime = 1284792806, ia_ctime_nsec = 0} postparent = {ia_ino = 17548678, ia_gfid = {107 'k', 4 '\004', 232 '�', 38 '&', 137 '\211', 36 '$', 66 'B', 131 '\203', 171 '�', 157 '\235', 186 '�', 227 '�', 139 '\213', 179 '�', 186 '�', 54 '6'}, ia_dev = 2064, ia_type = IA_IFDIR, ia_prot = {suid = 0 '\0', sgid = 0 '\0', sticky = 0 '\0', owner = { ---Type <return> to continue, or q <return> to quit--- read = 1 '\001', write = 1 '\001', exec = 1 '\001'}, group = {read = 1 '\001', write = 0 '\0', exec = 1 '\001'}, other = {read = 1 '\001', write = 0 '\0', exec = 1 '\001'}}, ia_nlink = 3, ia_uid = 0, ia_gid = 0, ia_rdev = 0, ia_size = 4096, ia_blksize = 4096, ia_blocks = 16, ia_atime = 1284792398, ia_atime_nsec = 0, ia_mtime = 1284792806, ia_mtime_nsec = 0, ia_ctime = 1284792806, ia_ctime_nsec = 0} __FUNCTION__ = "posix_create" #8 0x00002aaaad1232f9 in ac_create_resume (frame=0x2aaaac33fb94, this=0x635b88, loc=0x2aaaac77957c, flags=2, mode=1285, fd=0x2aaab0f3f64c, params=0x2aaab471b9a8) at access-control.c:1164 _new = (call_frame_t *) 0x2aaaac341ea4 old_THIS = (xlator_t *) 0x635b88 tmp_cbk = (fop_create_cbk_t) 0x2aaaaace7002 <default_create_cbk> __FUNCTION__ = "ac_create_resume" #9 0x00002aaaaacfcf83 in call_resume_wind (stub=0x2aaaac779544) at call-stub.c:2127 __FUNCTION__ = "call_resume_wind" #10 0x00002aaaaad03d6b in call_resume (stub=0x2aaaac779544) at call-stub.c:3870 old_THIS = (xlator_t *) 0x635b88 __FUNCTION__ = "call_resume" #11 0x00002aaaad1233bf in ac_create_stat_cbk (frame=0x2aaaac33fb94, cookie=0x2aaaac341034, this=0x635b88, op_ret=0, op_errno=0, buf=0x42d07db0) at access-control.c:1189 stub = (call_stub_t *) 0x2aaaac779544 __FUNCTION__ = "ac_create_stat_cbk" #12 0x00002aaaacf0203e in posix_stat (frame=0x2aaaac341034, this=0x6348d8, loc=0x42d07eb0) at posix.c:518 fn = (fop_stat_cbk_t) 0x2aaaad123319 <ac_create_stat_cbk> _parent = (call_frame_t *) 0x2aaaac33fb94 old_THIS = (xlator_t *) 0x6348d8 buf = {ia_ino = 17548678, ia_gfid = {107 'k', 4 '\004', 232 '�', 38 '&', 137 '\211', 36 '$', 66 'B', 131 '\203', 171 '�', 157 '\235', 186 '�', 227 '�', 139 '\213', 179 '�', 186 '�', 54 '6'}, ia_dev = 2064, ia_type = IA_IFDIR, ia_prot = {suid = 0 '\0', sgid = 0 '\0', sticky = 0 '\0', owner = {read = 1 '\001', write = 1 '\001', exec = 1 '\001'}, group = {read = 1 '\001', write = 0 '\0', exec = 1 '\001'}, other = {read = 1 '\001', write = 0 '\0', exec = 1 '\001'}}, ia_nlink = 3, ia_uid = 0, ia_gid = 0, ia_rdev = 0, ia_size = 4096, ia_blksize = 4096, ia_blocks = 16, ia_atime = 1284792398, ia_atime_nsec = 0, ia_mtime = 1284792806, ia_mtime_nsec = 0, ia_ctime = 1284792806, ia_ctime_nsec = 0} real_path = 0x42d07d10 "/mnt/dir1//dir1/nfsalpha200/ip-10-243-113-224/test1" op_ret = 0 op_errno = 0 priv = (struct posix_private *) 0x645158 __FUNCTION__ = "posix_stat" #13 0x00002aaaad123884 in ac_create (frame=0x2aaaac33fb94, this=0x635b88, loc=0x2aaaac77c2c4, flags=2, mode=1285, fd=0x2aaab0f3f64c, params=0x2aaab471b9a8) at access-control.c:1224 _new = (call_frame_t *) 0x2aaaac341034 ---Type <return> to continue, or q <return> to quit--- old_THIS = (xlator_t *) 0x635b88 tmp_cbk = (fop_stat_cbk_t) 0x2aaaad123319 <ac_create_stat_cbk> stub = (call_stub_t *) 0x2aaaac779544 ret = 0 parentloc = {path = 0x2aaab522de68 "/dir1/nfsalpha200/ip-10-243-113-224/test1", name = 0x2aaab522de8c "test1", ino = 17548678, inode = 0x2aaaae0e2714, parent = 0x2aaaae0e2678} __FUNCTION__ = "ac_create" #14 0x00002aaaad330ff3 in pl_create (frame=0x2aaaac340e24, this=0x636da8, loc=0x2aaaac77c2c4, flags=2, mode=1285, fd=0x2aaab0f3f64c, params=0x2aaab471b9a8) at posix.c:471 _new = (call_frame_t *) 0x2aaaac33fb94 old_THIS = (xlator_t *) 0x636da8 tmp_cbk = (fop_create_cbk_t) 0x2aaaad330b95 <pl_create_cbk> __FUNCTION__ = "pl_create" #15 0x00002aaaad7d51d6 in pump_create (frame=0x2aaaac340770, this=0x6392c8, loc=0x2aaaac77c2c4, flags=2, mode=1285, fd=0x2aaab0f3f64c, params=0x2aaab471b9a8) at pump.c:1816 _new = (call_frame_t *) 0x2aaaac340e24 old_THIS = (xlator_t *) 0x6392c8 tmp_cbk = (fop_create_cbk_t) 0x2aaaaace7002 <default_create_cbk> priv = (afr_private_t *) 0x640138 __FUNCTION__ = "pump_create" #16 0x00002aaaad9ef48b in iot_create_wrapper (frame=0x2aaaac33fda4, this=0x63a4d8, loc=0x2aaaac77c2c4, flags=2, mode=1285, fd=0x2aaab0f3f64c, params=0x2aaab471b9a8) at io-threads.c:746 _new = (call_frame_t *) 0x2aaaac340770 old_THIS = (xlator_t *) 0x63a4d8 tmp_cbk = (fop_create_cbk_t) 0x2aaaad9ef0b2 <iot_create_cbk> __FUNCTION__ = "iot_create_wrapper" #17 0x00002aaaaacfcf83 in call_resume_wind (stub=0x2aaaac77c28c) at call-stub.c:2127 __FUNCTION__ = "call_resume_wind" #18 0x00002aaaaad03d6b in call_resume (stub=0x2aaaac77c28c) at call-stub.c:3870 old_THIS = (xlator_t *) 0x63a4d8 __FUNCTION__ = "call_resume" #19 0x00002aaaad9eae2e in iot_worker (data=0x63f378) at io-threads.c:118 conf = (iot_conf_t *) 0x63f378 this = (xlator_t *) 0x63a4d8 stub = (call_stub_t *) 0x2aaaac77c28c sleep_till = {tv_sec = 1284792928, tv_nsec = 0} ---Type <return> to continue, or q <return> to quit--- ret = 0 timeout = 0 '\0' bye = 0 '\0' __FUNCTION__ = "iot_worker"
There are checks added so this crash should not happen. But I want it to be open for some more time as root cause is not yet known. As already work around is available, and this crash is not seen regularly, will reduce the severity, and will work on it post 3.1.0
Lakshmi, can you try this with newer release and see if it happens? IMO it should be fixed now..
Not seen after 3.1.0 GA.. Please re-open if seen again.