Bug 763367 (GLUSTER-1635) - running mixed tests glusterfs crashed
Summary: running mixed tests glusterfs crashed
Keywords:
Status: CLOSED WORKSFORME
Alias: GLUSTER-1635
Product: GlusterFS
Classification: Community
Component: protocol
Version: 3.1-alpha
Hardware: All
OS: Linux
low
medium
Target Milestone: ---
Assignee: Amar Tumballi
QA Contact:
URL:
Whiteboard:
Depends On:
Blocks:
TreeView+ depends on / blocked
 
Reported: 2010-09-18 07:04 UTC by Lakshmipathi G
Modified: 2015-12-01 16:45 UTC (History)
3 users (show)

Fixed In Version:
Doc Type: Bug Fix
Doc Text:
Clone Of:
Environment:
Last Closed:
Regression: RTP
Mount Type: nfs
Documentation: ---
CRM:
Verified Versions:


Attachments (Terms of Use)

Description Amar Tumballi 2010-09-18 04:32:44 UTC
submitted patch http://patches.gluster.com/patch/4853/ 

this prevents the crash.. but the root cause still needs to be understood and handled properly.

the 'callingfn()' log should help in case of failures anyways.

-Amar

Comment 1 Vijay Bellur 2010-09-18 04:51:37 UTC
PATCH: http://patches.gluster.com/patch/4853 in master (check for 'link_inode' in server protocol)

Comment 2 Lakshmipathi G 2010-09-18 07:04:39 UTC
while running mixed test with 4 dht server  + 7 clients , following core found with 3.1.0qa26.
-------------
(gdb) bt full
#0  0x00002aaaab575152 in pthread_spin_lock () from /lib64/libpthread.so.0
No symbol table info available.
#1  0x00002aaaaad04c3f in fd_bind (fd=0x2aaab0f3f64c) at fd.c:493
	inode = (inode_t *) 0x0
	__FUNCTION__ = "fd_bind"
#2  0x00002aaaadc11530 in server_create_cbk (frame=0x2aaaac550514, cookie=0x2aaaac33fda4, this=0x63b778, op_ret=0, op_errno=0, fd=0x2aaab0f3f64c, 
    inode=0x2aaaae0b9b0c, stbuf=0x42d07ad0, preparent=0x42d07a50, postparent=0x42d079e0) at server3_1-fops.c:1384
	conn = (server_connection_t *) 0x2aaab40015f8
	state = (server_state_t *) 0x2aaab4e140a8
	link_inode = (inode_t *) 0x0
	req = (rpcsvc_request_t *) 0x0
	fd_no = 0
	rsp = {gfs_id = 0, op_ret = 0, op_errno = 0, stat = {ia_gfid = '\0' <repeats 15 times>, ia_ino = 0, ia_dev = 0, mode = 0, ia_nlink = 0, ia_uid = 0, 
    ia_gid = 0, ia_rdev = 0, ia_size = 0, ia_blksize = 0, ia_blocks = 0, ia_atime = 0, ia_atime_nsec = 0, ia_mtime = 0, ia_mtime_nsec = 0, ia_ctime = 0, 
    ia_ctime_nsec = 0}, fd = 0, preparent = {ia_gfid = '\0' <repeats 15 times>, ia_ino = 0, ia_dev = 0, mode = 0, ia_nlink = 0, ia_uid = 0, ia_gid = 0, ia_rdev = 0, 
    ia_size = 0, ia_blksize = 0, ia_blocks = 0, ia_atime = 0, ia_atime_nsec = 0, ia_mtime = 0, ia_mtime_nsec = 0, ia_ctime = 0, ia_ctime_nsec = 0}, postparent = {
    ia_gfid = '\0' <repeats 15 times>, ia_ino = 0, ia_dev = 0, mode = 0, ia_nlink = 0, ia_uid = 0, ia_gid = 0, ia_rdev = 0, ia_size = 0, ia_blksize = 0, 
    ia_blocks = 0, ia_atime = 0, ia_atime_nsec = 0, ia_mtime = 0, ia_mtime_nsec = 0, ia_ctime = 0, ia_ctime_nsec = 0}}
	__FUNCTION__ = "server_create_cbk"
#3  0x00002aaaad9ef21f in iot_create_cbk (frame=0x2aaaac33fda4, cookie=0x2aaaac340770, this=0x63a4d8, op_ret=0, op_errno=0, fd=0x2aaab0f3f64c, inode=0x2aaaae0b9b0c, 
    stbuf=0x42d07ad0, preparent=0x42d07a50, postparent=0x42d079e0) at io-threads.c:736
	fn = (fop_create_cbk_t) 0x2aaaadc1138a <server_create_cbk>
	_parent = (call_frame_t *) 0x2aaaac550514
	old_THIS = (xlator_t *) 0x63a4d8
	__FUNCTION__ = "iot_create_cbk"
#4  0x00002aaaaace7179 in default_create_cbk (frame=0x2aaaac340770, cookie=0x2aaaac340e24, this=0x6392c8, op_ret=0, op_errno=0, fd=0x2aaab0f3f64c, 
    inode=0x2aaaae0b9b0c, buf=0x42d07ad0, preparent=0x42d07a50, postparent=0x42d079e0) at defaults.c:461
	fn = (ret_fn_t) 0x2aaaad9ef0b2 <iot_create_cbk>
	_parent = (call_frame_t *) 0x2aaaac33fda4
	old_THIS = (xlator_t *) 0x6392c8
	__FUNCTION__ = "default_create_cbk"
#5  0x00002aaaad330d87 in pl_create_cbk (frame=0x2aaaac340e24, cookie=0x2aaaac33fb94, this=0x636da8, op_ret=0, op_errno=0, fd=0x2aaab0f3f64c, inode=0x2aaaae0b9b0c, 
    buf=0x42d07ad0, preparent=0x42d07a50, postparent=0x42d079e0) at posix.c:460
	fn = (fop_create_cbk_t) 0x2aaaaace7002 <default_create_cbk>
	_parent = (call_frame_t *) 0x2aaaac340770
	old_THIS = (xlator_t *) 0x636da8
Missing separate debuginfos, use: debuginfo-install gcc.x86_64 glibc.x86_64
---Type <return> to continue, or q <return> to quit--- 
	dummy = 1
	ret = 0
	__FUNCTION__ = "pl_create_cbk"
#6  0x00002aaaaace7179 in default_create_cbk (frame=0x2aaaac33fb94, cookie=0x2aaaac341ea4, this=0x635b88, op_ret=0, op_errno=0, fd=0x2aaab0f3f64c, 
    inode=0x2aaaae0b9b0c, buf=0x42d07ad0, preparent=0x42d07a50, postparent=0x42d079e0) at defaults.c:461
	fn = (ret_fn_t) 0x2aaaad330b95 <pl_create_cbk>
	_parent = (call_frame_t *) 0x2aaaac340e24
	old_THIS = (xlator_t *) 0x635b88
	__FUNCTION__ = "default_create_cbk"
#7  0x00002aaaacf0a1fb in posix_create (frame=0x2aaaac341ea4, this=0x6348d8, loc=0x2aaaac77957c, flags=2, mode=1285, fd=0x2aaab0f3f64c, params=0x2aaab471b9a8)
    at posix.c:2086
	fn = (fop_create_cbk_t) 0x2aaaaace7002 <default_create_cbk>
	_parent = (call_frame_t *) 0x2aaaac33fb94
	old_THIS = (xlator_t *) 0x6348d8
	op_ret = 0
	op_errno = 0
	_fd = 296
	_flags = 66
	real_path = 0x42d07900 "/mnt/dir1//dir1/nfsalpha200/ip-10-243-113-224/test1/fstest_e714a0ea8375c01adf3eb26b4ba4942d"
	stbuf = {ia_ino = 17548696, ia_gfid = {0 '\0' <repeats 16 times>}, ia_dev = 2064, ia_type = IA_IFREG, ia_prot = {suid = 0 '\0', sgid = 0 '\0', 
    sticky = 1 '\001', owner = {read = 0 '\0', write = 0 '\0', exec = 0 '\0'}, group = {read = 0 '\0', write = 0 '\0', exec = 0 '\0'}, other = {read = 0 '\0', 
      write = 0 '\0', exec = 0 '\0'}}, ia_nlink = 1, ia_uid = 0, ia_gid = 0, ia_rdev = 0, ia_size = 0, ia_blksize = 4096, ia_blocks = 8, ia_atime = 1284792806, 
  ia_atime_nsec = 0, ia_mtime = 1284792806, ia_mtime_nsec = 0, ia_ctime = 1284792808, ia_ctime_nsec = 0}
	pfd = (struct posix_fd *) 0x2aaab526c448
	priv = (struct posix_private *) 0x645158
	was_present = 1 '\001'
	gid = 0
	pathdup = 0x2aaab4de8258 "/mnt/dir1//dir1/nfsalpha200/ip-10-243-113-224/test1"
	parentpath = 0x2aaab4de8258 "/mnt/dir1//dir1/nfsalpha200/ip-10-243-113-224/test1"
	preparent = {ia_ino = 17548678, ia_gfid = {107 'k', 4 '\004', 232 '�', 38 '&', 137 '\211', 36 '$', 66 'B', 131 '\203', 171 '�', 157 '\235', 186 '�', 227 '�', 
    139 '\213', 179 '�', 186 '�', 54 '6'}, ia_dev = 2064, ia_type = IA_IFDIR, ia_prot = {suid = 0 '\0', sgid = 0 '\0', sticky = 0 '\0', owner = {read = 1 '\001', 
      write = 1 '\001', exec = 1 '\001'}, group = {read = 1 '\001', write = 0 '\0', exec = 1 '\001'}, other = {read = 1 '\001', write = 0 '\0', exec = 1 '\001'}}, 
  ia_nlink = 3, ia_uid = 0, ia_gid = 0, ia_rdev = 0, ia_size = 4096, ia_blksize = 4096, ia_blocks = 16, ia_atime = 1284792398, ia_atime_nsec = 0, 
  ia_mtime = 1284792806, ia_mtime_nsec = 0, ia_ctime = 1284792806, ia_ctime_nsec = 0}
	postparent = {ia_ino = 17548678, ia_gfid = {107 'k', 4 '\004', 232 '�', 38 '&', 137 '\211', 36 '$', 66 'B', 131 '\203', 171 '�', 157 '\235', 186 '�', 
    227 '�', 139 '\213', 179 '�', 186 '�', 54 '6'}, ia_dev = 2064, ia_type = IA_IFDIR, ia_prot = {suid = 0 '\0', sgid = 0 '\0', sticky = 0 '\0', owner = {
---Type <return> to continue, or q <return> to quit---
      read = 1 '\001', write = 1 '\001', exec = 1 '\001'}, group = {read = 1 '\001', write = 0 '\0', exec = 1 '\001'}, other = {read = 1 '\001', write = 0 '\0', 
      exec = 1 '\001'}}, ia_nlink = 3, ia_uid = 0, ia_gid = 0, ia_rdev = 0, ia_size = 4096, ia_blksize = 4096, ia_blocks = 16, ia_atime = 1284792398, 
  ia_atime_nsec = 0, ia_mtime = 1284792806, ia_mtime_nsec = 0, ia_ctime = 1284792806, ia_ctime_nsec = 0}
	__FUNCTION__ = "posix_create"
#8  0x00002aaaad1232f9 in ac_create_resume (frame=0x2aaaac33fb94, this=0x635b88, loc=0x2aaaac77957c, flags=2, mode=1285, fd=0x2aaab0f3f64c, params=0x2aaab471b9a8)
    at access-control.c:1164
	_new = (call_frame_t *) 0x2aaaac341ea4
	old_THIS = (xlator_t *) 0x635b88
	tmp_cbk = (fop_create_cbk_t) 0x2aaaaace7002 <default_create_cbk>
	__FUNCTION__ = "ac_create_resume"
#9  0x00002aaaaacfcf83 in call_resume_wind (stub=0x2aaaac779544) at call-stub.c:2127
	__FUNCTION__ = "call_resume_wind"
#10 0x00002aaaaad03d6b in call_resume (stub=0x2aaaac779544) at call-stub.c:3870
	old_THIS = (xlator_t *) 0x635b88
	__FUNCTION__ = "call_resume"
#11 0x00002aaaad1233bf in ac_create_stat_cbk (frame=0x2aaaac33fb94, cookie=0x2aaaac341034, this=0x635b88, op_ret=0, op_errno=0, buf=0x42d07db0)
    at access-control.c:1189
	stub = (call_stub_t *) 0x2aaaac779544
	__FUNCTION__ = "ac_create_stat_cbk"
#12 0x00002aaaacf0203e in posix_stat (frame=0x2aaaac341034, this=0x6348d8, loc=0x42d07eb0) at posix.c:518
	fn = (fop_stat_cbk_t) 0x2aaaad123319 <ac_create_stat_cbk>
	_parent = (call_frame_t *) 0x2aaaac33fb94
	old_THIS = (xlator_t *) 0x6348d8
	buf = {ia_ino = 17548678, ia_gfid = {107 'k', 4 '\004', 232 '�', 38 '&', 137 '\211', 36 '$', 66 'B', 131 '\203', 171 '�', 157 '\235', 186 '�', 227 '�', 
    139 '\213', 179 '�', 186 '�', 54 '6'}, ia_dev = 2064, ia_type = IA_IFDIR, ia_prot = {suid = 0 '\0', sgid = 0 '\0', sticky = 0 '\0', owner = {read = 1 '\001', 
      write = 1 '\001', exec = 1 '\001'}, group = {read = 1 '\001', write = 0 '\0', exec = 1 '\001'}, other = {read = 1 '\001', write = 0 '\0', exec = 1 '\001'}}, 
  ia_nlink = 3, ia_uid = 0, ia_gid = 0, ia_rdev = 0, ia_size = 4096, ia_blksize = 4096, ia_blocks = 16, ia_atime = 1284792398, ia_atime_nsec = 0, 
  ia_mtime = 1284792806, ia_mtime_nsec = 0, ia_ctime = 1284792806, ia_ctime_nsec = 0}
	real_path = 0x42d07d10 "/mnt/dir1//dir1/nfsalpha200/ip-10-243-113-224/test1"
	op_ret = 0
	op_errno = 0
	priv = (struct posix_private *) 0x645158
	__FUNCTION__ = "posix_stat"
#13 0x00002aaaad123884 in ac_create (frame=0x2aaaac33fb94, this=0x635b88, loc=0x2aaaac77c2c4, flags=2, mode=1285, fd=0x2aaab0f3f64c, params=0x2aaab471b9a8)
    at access-control.c:1224
	_new = (call_frame_t *) 0x2aaaac341034
---Type <return> to continue, or q <return> to quit---
	old_THIS = (xlator_t *) 0x635b88
	tmp_cbk = (fop_stat_cbk_t) 0x2aaaad123319 <ac_create_stat_cbk>
	stub = (call_stub_t *) 0x2aaaac779544
	ret = 0
	parentloc = {path = 0x2aaab522de68 "/dir1/nfsalpha200/ip-10-243-113-224/test1", name = 0x2aaab522de8c "test1", ino = 17548678, inode = 0x2aaaae0e2714, 
  parent = 0x2aaaae0e2678}
	__FUNCTION__ = "ac_create"
#14 0x00002aaaad330ff3 in pl_create (frame=0x2aaaac340e24, this=0x636da8, loc=0x2aaaac77c2c4, flags=2, mode=1285, fd=0x2aaab0f3f64c, params=0x2aaab471b9a8)
    at posix.c:471
	_new = (call_frame_t *) 0x2aaaac33fb94
	old_THIS = (xlator_t *) 0x636da8
	tmp_cbk = (fop_create_cbk_t) 0x2aaaad330b95 <pl_create_cbk>
	__FUNCTION__ = "pl_create"
#15 0x00002aaaad7d51d6 in pump_create (frame=0x2aaaac340770, this=0x6392c8, loc=0x2aaaac77c2c4, flags=2, mode=1285, fd=0x2aaab0f3f64c, params=0x2aaab471b9a8)
    at pump.c:1816
	_new = (call_frame_t *) 0x2aaaac340e24
	old_THIS = (xlator_t *) 0x6392c8
	tmp_cbk = (fop_create_cbk_t) 0x2aaaaace7002 <default_create_cbk>
	priv = (afr_private_t *) 0x640138
	__FUNCTION__ = "pump_create"
#16 0x00002aaaad9ef48b in iot_create_wrapper (frame=0x2aaaac33fda4, this=0x63a4d8, loc=0x2aaaac77c2c4, flags=2, mode=1285, fd=0x2aaab0f3f64c, params=0x2aaab471b9a8)
    at io-threads.c:746
	_new = (call_frame_t *) 0x2aaaac340770
	old_THIS = (xlator_t *) 0x63a4d8
	tmp_cbk = (fop_create_cbk_t) 0x2aaaad9ef0b2 <iot_create_cbk>
	__FUNCTION__ = "iot_create_wrapper"
#17 0x00002aaaaacfcf83 in call_resume_wind (stub=0x2aaaac77c28c) at call-stub.c:2127
	__FUNCTION__ = "call_resume_wind"
#18 0x00002aaaaad03d6b in call_resume (stub=0x2aaaac77c28c) at call-stub.c:3870
	old_THIS = (xlator_t *) 0x63a4d8
	__FUNCTION__ = "call_resume"
#19 0x00002aaaad9eae2e in iot_worker (data=0x63f378) at io-threads.c:118
	conf = (iot_conf_t *) 0x63f378
	this = (xlator_t *) 0x63a4d8
	stub = (call_stub_t *) 0x2aaaac77c28c
	sleep_till = {tv_sec = 1284792928, tv_nsec = 0}
---Type <return> to continue, or q <return> to quit---
	ret = 0
	timeout = 0 '\0'
	bye = 0 '\0'
	__FUNCTION__ = "iot_worker"

Comment 3 Amar Tumballi 2010-10-04 02:17:24 UTC
There are checks added so this crash should not happen. But I want it to be open for some more time as root cause is not yet known. As already work around is available, and this crash is not seen regularly, will reduce the severity, and will work on it post 3.1.0

Comment 4 Amar Tumballi 2010-10-05 10:28:29 UTC
Lakshmi, can you try this with newer release and see if it happens? IMO it should be fixed now..

Comment 5 Amar Tumballi 2010-11-09 02:56:45 UTC
Not seen after 3.1.0 GA.. Please re-open if seen again.


Note You need to log in before you can comment on or make changes to this bug.