Bug 762586 (GLUSTER-854)

Summary: nfs server didn't start
Product: [Community] GlusterFS Reporter: Lakshmipathi G <lakshmipathi>
Component: nfsAssignee: Shehjar Tikoo <shehjart>
Status: CLOSED CURRENTRELEASE QA Contact:
Severity: medium Docs Contact:
Priority: high    
Version: nfs-alphaCC: anush, gluster-bugs
Target Milestone: ---   
Target Release: ---   
Hardware: All   
OS: Linux   
Whiteboard:
Fixed In Version: Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of: Environment:
Last Closed: Type: ---
Regression: RTP Mount Type: nfs
Documentation: --- CRM:
Verified Versions: Category: ---
oVirt Team: --- RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: ---
Attachments:
Description Flags
nfs server log file with trace none

Description Lakshmipathi G 2010-04-25 02:24:46 EDT
While trying to setup cluster with cloud-seed program ,  glusterfs-nfs server not started. Following core file can be found under ec2-67-202-6-25.compute-1.amazonaws.com:/core.17851

(gdb) bt
#0  0x00002aaaabf26e29 in client_mkdir (frame=0x82c750, this=0x611450, loc=0x806c50, mode=493) at client-protocol.c:1027
#1  0x00002aaaac14c6cd in afr_mkdir_wind (frame=0x81bd68, this=<value optimized out>) at afr-dir-write.c:756
#2  0x00002aaaac160218 in afr_changelog_pre_op_cbk (frame=0x81bd68, cookie=<value optimized out>, this=0x613070, op_ret=<value optimized out>, op_errno=61, 
    xattr=<value optimized out>) at afr-transaction.c:919
#3  0x00002aaaabf1e0c4 in client_xattrop_cbk (frame=0x733ff0, hdr=<value optimized out>, hdrlen=<value optimized out>, iobuf=<value optimized out>)
    at client-protocol.c:3657
#4  0x00002aaaabf181ca in protocol_client_pollin (this=0x60f0f0, trans=0x620f10) at client-protocol.c:6391
#5  0x00002aaaabf26982 in notify (this=0x0, event=2, data=0x620f10) at client-protocol.c:6510
#6  0x00002aaaaacdb433 in xlator_notify (xl=0x60f0f0, event=2, data=0x620f10) at xlator.c:919
#7  0x00002aaaad438083 in socket_event_handler (fd=<value optimized out>, idx=2, data=0x620f10, poll_in=1, poll_out=0, poll_err=0) at socket.c:831
#8  0x00002aaaaacf64e5 in event_dispatch_epoll (event_pool=0x60a8b0) at event.c:804
#9  0x0000000000404377 in main (argc=8, argv=0x7fff8c8133e8) at glusterfsd.c:1494
(gdb) bt full
#0  0x00002aaaabf26e29 in client_mkdir (frame=0x82c750, this=0x611450, loc=0x806c50, mode=493) at client-protocol.c:1027
	hdr = <value optimized out>
	req = <value optimized out>
	ret = -1
	pathlen = 2
	baselen = 1
	par = 0
	gen = 0
	local = <value optimized out>
	__FUNCTION__ = "client_mkdir"
#1  0x00002aaaac14c6cd in afr_mkdir_wind (frame=0x81bd68, this=<value optimized out>) at afr-dir-write.c:756
	_new = (call_frame_t *) 0x0
	old_THIS = (xlator_t *) 0x613070
	local = (afr_local_t *) 0x806c20
	priv = (afr_private_t *) 0x61e210
	call_count = 2
#2  0x00002aaaac160218 in afr_changelog_pre_op_cbk (frame=0x81bd68, cookie=<value optimized out>, this=0x613070, op_ret=<value optimized out>, op_errno=61, 
    xattr=<value optimized out>) at afr-transaction.c:919
	local = (afr_local_t *) 0x806c20
	priv = (afr_private_t *) 0x61e210
	child_index = 1
	__FUNCTION__ = "afr_changelog_pre_op_cbk"
#3  0x00002aaaabf1e0c4 in client_xattrop_cbk (frame=0x733ff0, hdr=<value optimized out>, hdrlen=<value optimized out>, iobuf=<value optimized out>)
    at client-protocol.c:3657
	fn = (ret_fn_t) 0x2aaaac160160 <afr_changelog_pre_op_cbk>
	_parent = (call_frame_t *) 0x81bd68
	old_THIS = (xlator_t *) 0x60f0f0
	op_ret = 0
	op_errno = 61
	dict_len = <value optimized out>
	dict = (dict_t *) 0x9ad7e0
	ret = <value optimized out>
	dictbuf = 0x0
	__FUNCTION__ = "client_xattrop_cbk"
#4  0x00002aaaabf181ca in protocol_client_pollin (this=0x60f0f0, trans=0x620f10) at client-protocol.c:6391
	conf = (client_conf_t *) 0x620e90
	ret = 0
---Type <return> to continue, or q <return> to quit--- 
	iobuf = (struct iobuf *) 0x0
	hdr = 0x98e200 ""
	hdrlen = 270
#5  0x00002aaaabf26982 in notify (this=0x0, event=2, data=0x620f10) at client-protocol.c:6510
	ret = <value optimized out>
	child_down = <value optimized out>
	was_not_down = <value optimized out>
	trans = (transport_t *) 0x611450
	conn = <value optimized out>
	conf = (client_conf_t *) 0x620e90
	parent = <value optimized out>
	__FUNCTION__ = "notify"
#6  0x00002aaaaacdb433 in xlator_notify (xl=0x60f0f0, event=2, data=0x620f10) at xlator.c:919
	old_THIS = (xlator_t *) 0x2aaaaaf1c500
	ret = <value optimized out>
#7  0x00002aaaad438083 in socket_event_handler (fd=<value optimized out>, idx=2, data=0x620f10, poll_in=1, poll_out=0, poll_err=0) at socket.c:831
	this = (transport_t *) 0x0
	priv = (socket_private_t *) 0x621260
	ret = 0
#8  0x00002aaaaacf64e5 in event_dispatch_epoll (event_pool=0x60a8b0) at event.c:804
	events = (struct epoll_event *) 0x623640
	i = 1
	ret = 4
	__FUNCTION__ = "event_dispatch_epoll"
#9  0x0000000000404377 in main (argc=8, argv=0x7fff8c8133e8) at glusterfsd.c:1494
	ctx = (glusterfs_ctx_t *) 0x609010
	cmd_args = <value optimized out>
	stbuf = {st_dev = 140735550665520, st_ino = 140735550665440, st_nlink = 4131212846, st_mode = 2357277432, st_uid = 32767, st_gid = 0, pad0 = 0, 
  st_rdev = 46912496157250, st_size = 0, st_blksize = 46912508694904, st_blocks = 46909632806913, st_atim = {tv_sec = 0, tv_nsec = 46909632806913}, st_mtim = {
    tv_sec = 46912506351840, tv_nsec = 140735550665296}, st_ctim = {tv_sec = 0, tv_nsec = 750006350}, __unused = {191, 6652244560, 46912508694904}}
	tmp_logfile = '\0' <repeats 1023 times>
	tmp_logfile_dyn = <value optimized out>
	tmp_logfilebase = <value optimized out>
	timestr = '\0' <repeats 255 times>
	utime = 1272175175
	tm = <value optimized out>
	ret = 0
---Type <return> to continue, or q <return> to quit---
	lim = {rlim_cur = 18446744073709551615, rlim_max = 18446744073709551615}
	specfp = <value optimized out>
	graph = (xlator_t *) 0x0
	trav = <value optimized out>
	fuse_volume_found = 0
	xl_count = 0
	pipe_fd = {6, 7}
	gf_success = 0
	gf_failure = -1
	__FUNCTION__ = "main"
(gdb) 


log file:
=========
[2010-04-25 01:39:23] D [rpcsvc.c:1790:rpcsvc_request_create] rpc-service: RPC XID: 43a4d899, Ver: 2, Program: 100003, ProgVers: 3, Proc: 7
[2010-04-25 01:39:23] D [rpcsvc.c:1266:rpcsvc_program_actor] rpc-service: Actor found: NFS3 - WRITE
[2010-04-25 01:39:23] D [rpcsvc.c:1266:rpcsvc_program_actor] rpc-service: Actor found: NFS3 - WRITE
[2010-04-25 01:39:23] D [rpcsvc.c:1266:rpcsvc_program_actor] rpc-service: Actor found: NFS3 - WRITE
[2010-04-25 01:39:23] D [rpcsvc.c:1266:rpcsvc_program_actor] rpc-service: Actor found: NFS3 - WRITE
/lib64/libc.so.6[0x2aaaab383280]
/opt/glusterfs/gnfs-git/lib/glusterfs/3.0.0git/xlator/protocol/client.so(client_mkdir+0xa9)[0x2aaaabf26e29]
/opt/glusterfs/gnfs-git/lib/glusterfs/3.0.0git/xlator/cluster/replicate.so(afr_mkdir_wind+0x17d)[0x2aaaac14c6cd]
/opt/glusterfs/gnfs-git/lib/glusterfs/3.0.0git/xlator/cluster/replicate.so(afr_changelog_pre_op_cbk+0xb8)[0x2aaaac160218]
/opt/glusterfs/gnfs-git/lib/glusterfs/3.0.0git/xlator/protocol/client.so(client_xattrop_cbk+0x134)[0x2aaaabf1e0c4]
/opt/glusterfs/gnfs-git/lib/glusterfs/3.0.0git/xlator/protocol/client.so(protocol_client_pollin+0xca)[0x2aaaabf181ca]
/opt/glusterfs/gnfs-git/lib/glusterfs/3.0.0git/xlator/protocol/client.so(notify+0x212)[0x2aaaabf26982]
/opt/glusterfs/gnfs-git/lib/libglusterfs.so.0(xlator_notify+0x43)[0x2aaaaacdb433]
/opt/glusterfs/gnfs-git/lib/glusterfs/3.0.0git/transport/socket.so(socket_event_handler+0xd3)[0x2aaaad438083]
/opt/glusterfs/gnfs-git/lib/libglusterfs.so.0[0x2aaaaacf64e5]
/opt/glusterfs/gnfs-git/sbin/glusterfsd(main+0xb17)[0x404377]
/lib64/libc.so.6(__libc_start_main+0xf4)[0x2aaaab370974]
/opt/glusterfs/gnfs-git/sbin/glusterfsd[0x4027b9]
---------
Comment 1 Lakshmipathi G 2010-04-25 23:01:35 EDT
Created attachment 186 [details]
modified specs and patches for the latest cdrecord

log file with trace
Comment 2 Lakshmipathi G 2010-04-25 23:02:44 EDT
file : /core.3157

(gdb) bt 
#0  0x00002aaaabf385fa in client_mkdir (frame=0x6fd550, this=0x613540, loc=0x6f9b20, mode=493) at client-protocol.c:1027
#1  0x00002aaaac17017d in afr_mkdir_wind (frame=0x706b18, this=0x615110) at afr-dir-write.c:756
#2  0x00002aaaac187623 in afr_changelog_pre_op_cbk (frame=0x706b18, cookie=0x0, this=0x615110, op_ret=0, op_errno=61, xattr=0x70cc70) at afr-transaction.c:919
#3  0x00002aaaabf4374c in client_xattrop_cbk (frame=0x6e0280, hdr=0x706130, hdrlen=270, iobuf=0x0) at client-protocol.c:3657
#4  0x00002aaaabf4b55c in protocol_client_interpret (this=0x613540, trans=0x621940, hdr_p=0x706130 "", hdrlen=270, iobuf=0x0) at client-protocol.c:6093
#5  0x00002aaaabf4c222 in protocol_client_pollin (this=0x613540, trans=0x621940) at client-protocol.c:6391
#6  0x00002aaaabf4c899 in notify (this=0x613540, event=2, data=0x621940) at client-protocol.c:6510
#7  0x00002aaaaace0b7c in xlator_notify (xl=0x613540, event=2, data=0x621940) at xlator.c:919
#8  0x00002aaaad49ae96 in socket_event_poll_in (this=0x621940) at socket.c:731
#9  0x00002aaaad49b18b in socket_event_handler (fd=9, idx=0, data=0x621940, poll_in=1, poll_out=0, poll_err=0) at socket.c:831
#10 0x00002aaaaad072b9 in event_dispatch_epoll_handler (event_pool=0x60c8b0, events=0x625910, i=1) at event.c:804
#11 0x00002aaaaad0748e in event_dispatch_epoll (event_pool=0x60c8b0) at event.c:867
#12 0x00002aaaaad077a4 in event_dispatch (event_pool=0x60c8b0) at event.c:975
#13 0x0000000000406344 in main (argc=8, argv=0x7fffafdda9c8) at glusterfsd.c:1494
(gdb) bt full
#0  0x00002aaaabf385fa in client_mkdir (frame=0x6fd550, this=0x613540, loc=0x6f9b20, mode=493) at client-protocol.c:1027
	hdr = (gf_hdr_common_t *) 0x0
	req = (gf_fop_mkdir_req_t *) 0x0
	hdrlen = 18446744073709551615
	ret = -1
	pathlen = 2
	baselen = 1
	par = 0
	gen = 0
	local = (client_local_t *) 0x70ab50
	__FUNCTION__ = "client_mkdir"
#1  0x00002aaaac17017d in afr_mkdir_wind (frame=0x706b18, this=0x615110) at afr-dir-write.c:756
	_new = (call_frame_t *) 0x6fd550
	old_THIS = (xlator_t *) 0x615110
	tmp_cbk = (fop_mkdir_cbk_t) 0x2aaaac16fa04 <afr_mkdir_wind_cbk>
	local = (afr_local_t *) 0x6f9af0
	priv = (afr_private_t *) 0x6204a0
	call_count = 2
	i = 0
#2  0x00002aaaac187623 in afr_changelog_pre_op_cbk (frame=0x706b18, cookie=0x0, this=0x615110, op_ret=0, op_errno=61, xattr=0x70cc70) at afr-transaction.c:919
	local = (afr_local_t *) 0x6f9af0
	priv = (afr_private_t *) 0x6204a0
	loc = (loc_t *) 0x6f9b20
	call_count = 0
	child_index = 0
	__FUNCTION__ = "afr_changelog_pre_op_cbk"
#3  0x00002aaaabf4374c in client_xattrop_cbk (frame=0x6e0280, hdr=0x706130, hdrlen=270, iobuf=0x0) at client-protocol.c:3657
	fn = (ret_fn_t) 0x2aaaac1873e5 <afr_changelog_pre_op_cbk>
	_parent = (call_frame_t *) 0x706b18
	old_THIS = (xlator_t *) 0x613540
	rsp = (gf_fop_xattrop_rsp_t *) 0x70619c
	op_ret = 0
	gf_errno = 61
	op_errno = 61
	dict_len = 157
	dict = (dict_t *) 0x70cc70
	ret = 0
---Type <return> to continue, or q <return> to quit--- 
	dictbuf = 0x0
	__FUNCTION__ = "client_xattrop_cbk"
#4  0x00002aaaabf4b55c in protocol_client_interpret (this=0x613540, trans=0x621940, hdr_p=0x706130 "", hdrlen=270, iobuf=0x0) at client-protocol.c:6093
	ret = -1
	frame = (call_frame_t *) 0x6e0280
	hdr = (gf_hdr_common_t *) 0x706130
	callid = 7
	type = 4
	op = 35
	conn = (client_connection_t *) 0x621d60
	__FUNCTION__ = "protocol_client_interpret"
#5  0x00002aaaabf4c222 in protocol_client_pollin (this=0x613540, trans=0x621940) at client-protocol.c:6391
	conf = (client_conf_t *) 0x6218c0
	ret = 0
	iobuf = (struct iobuf *) 0x0
	hdr = 0x706130 ""
	hdrlen = 270
#6  0x00002aaaabf4c899 in notify (this=0x613540, event=2, data=0x621940) at client-protocol.c:6510
	i = 0
	ret = -1
	child_down = 1
	was_not_down = 0
	trans = (transport_t *) 0x621940
	conn = (client_connection_t *) 0x0
	conf = (client_conf_t *) 0x6218c0
	parent = (xlator_list_t *) 0x0
	__FUNCTION__ = "notify"
#7  0x00002aaaaace0b7c in xlator_notify (xl=0x613540, event=2, data=0x621940) at xlator.c:919
	old_THIS = (xlator_t *) 0x2aaaaaf34640
	ret = 0
#8  0x00002aaaad49ae96 in socket_event_poll_in (this=0x621940) at socket.c:731
	ret = 0
#9  0x00002aaaad49b18b in socket_event_handler (fd=9, idx=0, data=0x621940, poll_in=1, poll_out=0, poll_err=0) at socket.c:831
	this = (transport_t *) 0x621940
	priv = (socket_private_t *) 0x621c90
	ret = 0
#10 0x00002aaaaad072b9 in event_dispatch_epoll_handler (event_pool=0x60c8b0, events=0x625910, i=1) at event.c:804
---Type <return> to continue, or q <return> to quit---
	event_data = (struct event_data *) 0x625920
	handler = (event_handler_t) 0x2aaaad49b0d4 <socket_event_handler>
	data = (void *) 0x621940
	idx = 0
	ret = -1
	__FUNCTION__ = "event_dispatch_epoll_handler"
#11 0x00002aaaaad0748e in event_dispatch_epoll (event_pool=0x60c8b0) at event.c:867
	events = (struct epoll_event *) 0x625910
	size = 6
	i = 1
	ret = 0
	__FUNCTION__ = "event_dispatch_epoll"
#12 0x00002aaaaad077a4 in event_dispatch (event_pool=0x60c8b0) at event.c:975
	ret = -1
	__FUNCTION__ = "event_dispatch"
#13 0x0000000000406344 in main (argc=8, argv=0x7fffafdda9c8) at glusterfsd.c:1494
	ctx = (glusterfs_ctx_t *) 0x60b010
	cmd_args = (cmd_args_t *) 0x60b010
	pool = (call_pool_t *) 0x60b3d0
	stbuf = {st_dev = 750006350, st_ino = 191, st_nlink = 7245506608, st_mode = 2875986296, st_uid = 10922, st_gid = 2863428448, pad0 = 10922, 
  st_rdev = 140736143927616, st_size = 46912496234496, st_blksize = 4199511, st_blocks = 46909632806990, st_atim = {tv_sec = 6442450944, tv_nsec = 0}, st_mtim = {
    tv_sec = 0, tv_nsec = 0}, st_ctim = {tv_sec = 5658608, tv_nsec = 191}, __unused = {5658608, 140736143927478, 140736143927479}}
	tmp_logfile = '\0' <repeats 1023 times>
	tmp_logfile_dyn = 0x0
	tmp_logfilebase = 0x0
	timestr = '\0' <repeats 255 times>
	utime = 1272260943
	tm = (struct tm *) 0x0
	ret = 0
	lim = {rlim_cur = 18446744073709551615, rlim_max = 18446744073709551615}
	specfp = (FILE *) 0x60cbe0
	graph = (xlator_t *) 0x61dcf0
	trav = (xlator_t *) 0x0
	fuse_volume_found = 0
	xl_count = 13
	process_mode = 0 '\0'
	pipe_fd = {6, 7}
---Type <return> to continue, or q <return> to quit---
	gf_success = 0
	gf_failure = -1
	__FUNCTION__ = "main"


----
Comment 3 Shehjar Tikoo 2010-04-26 01:10:43 EDT
The source line of the crash says loc->parent->ino is being dereferenced when loc->parent is NULL. 

Looking at the core, the backtrace is not a "..self-heal backtrace because it has pre_op_cbk...which only occurs during a normal "mkdir""(Vikas).

Further investigation into the log shows that we're ending up with a:
[2010-04-26 01:49:23] T [nfs-fops.c:674:nfs_fop_mkdir] nfs: Mkdir: /

mkdir on root. That is the reason why we find client_mkdir looking into a NULL loc->parent. The last mkdir seen is:
=======================
[2010-04-26 01:49:23] D [nfs3-helpers.c:2070:nfs3_log_fh_entry_call] nfs-nfsv3: XID: 788301f0, MKDIR: args: FH: hashcount 2, xlid 0, gen 5463637784256338392, ino 126288616, name: fileop_dir_0_0_13
[2010-04-26 01:49:23] T [nfs3.c:2342:nfs3_mkdir] nfs-nfsv3: FH to Volume: statprefetch
[2010-04-26 01:49:23] T [nfs3-helpers.c:2766:nfs3_fh_resolve_entry_hard] nfs-nfsv3: FH hard resolution: ino: 126288616, gen: 5463637784256338392, entry: fileop_dir_0_0_13, hashidx: 0
[2010-04-26 01:49:23] T [nfs3-helpers.c:2780:nfs3_fh_resolve_entry_hard] nfs-nfsv3: Entry needs parent lookup: (null)
[2010-04-26 01:49:23] T [nfs3-helpers.c:2732:nfs3_fh_resolve_inode_hard] nfs-nfsv3: FH hard resolution: ino: 126288616, gen: 5463637784256338392, hashidx: 1
[2010-04-26 01:49:23] T [nfs3-helpers.c:2737:nfs3_fh_resolve_inode_hard] nfs-nfsv3: Dir will be opened: /

=========================

The fh resolution results in a state where the parent inode of the given parent directory, i.e. the grandparent of the directory to be created, is not available through the inode table and needs to be deep-resolved. This is what is meant by:

[2010-04-26 01:49:23] T [nfs3-helpers.c:2780:nfs3_fh_resolve_entry_hard] nfs-nfsv3: Entry needs parent lookup: (null)

From here it is getting stuck in a readdir loop on /, where root directory is empty, going by the contents being returned.

Later, the readdir has finished readding the directory and calls the resume function with an ENOENT, as shown by:

[2010-04-26 01:49:23] T [nfs3-helpers.c:2694:nfs3_fh_resolve_readdir_cbk] nfs-nfsv3: Directory read done: /: Success
[2010-04-26 01:49:23] T [nfs.c:407:nfs_user_create] nfs: uid: 0, gid 0, gids: 0
[2010-04-26 01:49:23] T [nfs-fops.c:674:nfs_fop_mkdir] nfs: Mkdir: /

Yet, nfs3_mkdir_resume is not able to catch the error that one of the components was not resolved. This is because the macro nfs3_check_new_fh_resolve_status does not consider ENOENT an error message when a new entry is being created since it is correct for the fh resolution to return ENOENT on not finding the fh for the entry being created. The problem, however, is that this behaviour does not account for an ENOENT for a component leading up to the new entry.

There are two problems here:

1. The code path that resolves an ancestor of parent of a new entry contains  bug.
2. The code path that signals ENOENT to the resume functions, needs to differentiate between an ENOENT for the last component in the path and an ENOENT for the intermediate component. In the latter, the return should be an ESTALE.
Comment 4 Anush Shetty 2010-04-26 01:26:02 EDT
Fileop command used: fileop -f 100 -t
Comment 5 Shehjar Tikoo 2010-04-26 01:41:35 EDT
Further investigation reveals that the server was probably killed in the middle
of a prior NFS test which resulted in pending NFS requests on the clients. The
server was then restarted after cleaning up the backends. When the retransmits
happened from the clients, the ancestor directories were not present. This is
why readdir always returns just . and ..

This could also be the reason why some of the WRITE requests are returning
ENOENT.
(TODO: This should be changed to return ESTALE.)

The fix is to make NFS server handle the problems mentioned earlier gracefully.
Comment 6 Shehjar Tikoo 2010-04-26 01:43:27 EDT
The work-around is to reboot the test clients to flush any pending request state to prevent a retransmit, especially after the server was killed and the backend test directories cleaned.
Comment 7 Anand Avati 2010-05-25 23:31:44 EDT
PATCH: http://patches.gluster.com/patch/3157 in release-3.0 (stat-prefetch: Remove checks for loc->parent)
Comment 8 Shehjar Tikoo 2010-07-15 03:24:13 EDT
Does not crash nfs server when run in stand-alone mode without protocol/clients in the tree.

The error seen in this case is:
[2010-07-15 06:16:39] D [rpcsvc.c:1266:rpcsvc_program_actor] rpc-service: Actor found: NFS3 - MKDIR
[2010-07-15 06:16:39] D [nfs3-helpers.c:2071:nfs3_log_fh_entry_call] nfs-nfsv3: XID: fc1c8255, MKDIR: args: FH: hashcount 6, xlid 0, gen 5494074632640135174, ino 36323335, name: test7
[2010-07-15 06:16:39] T [nfs3.c:2342:nfs3_mkdir] nfs-nfsv3: FH to Volume: posix
[2010-07-15 06:16:39] T [nfs3-helpers.c:2772:nfs3_fh_resolve_entry_hard] nfs-nfsv3: FH hard resolution: ino: 36323335, gen: 5494074632640135174, entry: test7, hashidx: 0

#############################################################
######### Needs graceful handling ###########################
#############################################################
[2010-07-15 06:16:39] T [nfs3-helpers.c:2786:nfs3_fh_resolve_entry_hard] nfs-nfsv3: Entry needs parent lookup: (null)
[2010-07-15 06:16:39] T [nfs3-helpers.c:2738:nfs3_fh_resolve_inode_hard] nfs-nfsv3: FH hard resolution: ino: 36323335, gen: 5494074632640135174, hashidx: 1
[2010-07-15 06:16:39] T [nfs3-helpers.c:2743:nfs3_fh_resolve_inode_hard] nfs-nfsv3: Dir will be opened: /
[2010-07-15 06:16:39] T [nfs-fops.c:394:nfs_fop_opendir] nfs: Opendir: /
[2010-07-15 06:16:39] T [access-control.c:205:ac_test_access] access-control: Root has access
[2010-07-15 06:16:39] T [nfs3-helpers.c:2536:nfs3_fh_resolve_opendir_cbk] nfs-nfsv3: Reading directory: /
[2010-07-15 06:16:39] T [nfs-fops.c:483:nfs_fop_readdirp] nfs: readdir
[2010-07-15 06:16:39] T [nfs3-helpers.c:2677:nfs3_fh_resolve_search_dir] nfs-nfsv3: Candidate: ., ino: 36323329, gen: 5479934054113476609
[2010-07-15 06:16:39] T [nfs3-helpers.c:2677:nfs3_fh_resolve_search_dir] nfs-nfsv3: Candidate: .., ino: 32243713, gen: 5479934402005827585
[2010-07-15 06:16:39] T [nfs-fops.c:483:nfs_fop_readdirp] nfs: readdir
[2010-07-15 06:16:39] T [nfs3-helpers.c:2700:nfs3_fh_resolve_readdir_cbk] nfs-nfsv3: Directory read done: /: Success
[2010-07-15 06:16:39] T [nfs.c:407:nfs_user_create] nfs: uid: 0, gid 0, gids: 0
[2010-07-15 06:16:39] T [nfs-fops.c:674:nfs_fop_mkdir] nfs: Mkdir: /

######################################################################
[2010-07-15 06:16:39] D [nfs3-helpers.c:2244:nfs3_log_newfh_res] nfs-nfsv3: XID: fc1c8255, MKDIR: NFS: 10006(Error occurred on the server or IO Error), POSIX: 14(Bad address), FH: hashcount 0, xlid 0, gen 0, ino 0
######################################################################


The client operations that reproduce this are:
[root@domU-12-31-39-0E-B1-16 shehjart]# cat /proc/mounts
....
....
....
domU-12-31-39-10-6D-E2:/posix /mnt/shehjart/mount nfs rw,vers=3,rsize=65536,wsize=65536,acregmin=300,acregmax=300,acdirmin=300,acdirmax=300,hard,intr,proto=tcp,timeo=600,retrans=2,s0

[root@domU-12-31-39-0E-B1-16 shehjart]# mkdir -p mount/test1/test3/test3/test4/test5/test6/
[root@domU-12-31-39-0E-B1-16 shehjart]# ls -lR mount
mount:
total 8
drwxr-xr-x 3 root root 4096 2010-07-15 06:16 test1

mount/test1:
total 8
drwxr-xr-x 3 root root 4096 2010-07-15 06:16 test3

mount/test1/test3:
total 8
drwxr-xr-x 3 root root 4096 2010-07-15 06:16 test3

mount/test1/test3/test3:
total 8
drwxr-xr-x 3 root root 4096 2010-07-15 06:16 test4

mount/test1/test3/test3/test4:
total 8
drwxr-xr-x 3 root root 4096 2010-07-15 06:16 test5

mount/test1/test3/test3/test4/test5:
total 4
drwxr-xr-x 2 root root 4096 2010-07-15 06:16 test6

mount/test1/test3/test3/test4/test5/test6:
total 0

#############################################################
###### Stop NFS server, clean up backend dir, and restart ###
###### NFS server                                         ###

[root@domU-12-31-39-0E-B1-16 shehjart]# mkdir mount/test1/test3/test3/test4/test5/test6/test7
mkdir: cannot create directory `mount/test1/test3/test3/test4/test5/test6/test7': Unknown error 526

This is over simple posix+io-threads vol config.
Comment 9 Shehjar Tikoo 2010-07-15 03:25:46 EDT
First part of the bug is in NFS which should handle the given situation gracefully, the second part is in protocol/client which crashes.
Comment 10 Shehjar Tikoo 2010-07-15 04:00:43 EDT
The client crash itself looks like the problem described in bug 762707.
Comment 11 Shehjar Tikoo 2010-07-15 04:22:56 EDT
(In reply to comment #8)
> Does not crash nfs server when run in stand-alone mode without protocol/clients
> in the tree.
> 
> The error seen in this case is:
> [2010-07-15 06:16:39] D [rpcsvc.c:1266:rpcsvc_program_actor] rpc-service: Actor
> found: NFS3 - MKDIR
> [2010-07-15 06:16:39] D [nfs3-helpers.c:2071:nfs3_log_fh_entry_call] nfs-nfsv3:
> XID: fc1c8255, MKDIR: args: FH: hashcount 6, xlid 0, gen 5494074632640135174,
> ino 36323335, name: test7
> [2010-07-15 06:16:39] T [nfs3.c:2342:nfs3_mkdir] nfs-nfsv3: FH to Volume: posix
> [2010-07-15 06:16:39] T [nfs3-helpers.c:2772:nfs3_fh_resolve_entry_hard]
> nfs-nfsv3: FH hard resolution: ino: 36323335, gen: 5494074632640135174, entry:
> test7, hashidx: 0
> 
> #############################################################
> ######### Needs graceful handling ###########################
> #############################################################
> [2010-07-15 06:16:39] T [nfs3-helpers.c:2786:nfs3_fh_resolve_entry_hard]
> nfs-nfsv3: Entry needs parent lookup: (null)
> [2010-07-15 06:16:39] T [nfs3-helpers.c:2738:nfs3_fh_resolve_inode_hard]
> nfs-nfsv3: FH hard resolution: ino: 36323335, gen: 5494074632640135174,
> hashidx: 1
> [2010-07-15 06:16:39] T [nfs3-helpers.c:2743:nfs3_fh_resolve_inode_hard]
> nfs-nfsv3: Dir will be opened: /
> [2010-07-15 06:16:39] T [nfs-fops.c:394:nfs_fop_opendir] nfs: Opendir: /
> [2010-07-15 06:16:39] T [access-control.c:205:ac_test_access] access-control:
> Root has access
> [2010-07-15 06:16:39] T [nfs3-helpers.c:2536:nfs3_fh_resolve_opendir_cbk]
> nfs-nfsv3: Reading directory: /
> [2010-07-15 06:16:39] T [nfs-fops.c:483:nfs_fop_readdirp] nfs: readdir
> [2010-07-15 06:16:39] T [nfs3-helpers.c:2677:nfs3_fh_resolve_search_dir]
> nfs-nfsv3: Candidate: ., ino: 36323329, gen: 5479934054113476609
> [2010-07-15 06:16:39] T [nfs3-helpers.c:2677:nfs3_fh_resolve_search_dir]
> nfs-nfsv3: Candidate: .., ino: 32243713, gen: 5479934402005827585
> [2010-07-15 06:16:39] T [nfs-fops.c:483:nfs_fop_readdirp] nfs: readdir
> [2010-07-15 06:16:39] T [nfs3-helpers.c:2700:nfs3_fh_resolve_readdir_cbk]
> nfs-nfsv3: Directory read done: /: Success
> [2010-07-15 06:16:39] T [nfs.c:407:nfs_user_create] nfs: uid: 0, gid 0, gids: 0
> [2010-07-15 06:16:39] T [nfs-fops.c:674:nfs_fop_mkdir] nfs: Mkdir: /
> 
> ######################################################################
> [2010-07-15 06:16:39] D [nfs3-helpers.c:2244:nfs3_log_newfh_res] nfs-nfsv3:
> XID: fc1c8255, MKDIR: NFS: 10006(Error occurred on the server or IO Error),
> POSIX: 14(Bad address), FH: hashcount 0, xlid 0, gen 0, ino 0
> ######################################################################
> 
The error is actually coming from storage/posix and that is perfectly fine. The real problem is that we end up sending a MKDIR on /. The correct logic should have prevented this from happening.

int32_t                                                                         
nfs3_fh_resolve_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, 
                             int32_t op_ret, int32_t op_errno,                  
                             gf_dirent_t *entries)                              
{                                                                               
        nfs3_call_state_t       *cs = NULL;                                     
                                                                                
        cs = frame->local;                                                      
        if (op_ret <= 0) {                                                      
                gf_log (GF_NFS3, GF_LOG_TRACE, "Directory read done: %s: %s",   
                        cs->resolvedloc.path, strerror (op_ret));               
                cs->resolve_ret = -1;                                           
                cs->resolve_errno = ENOENT;
                                     
                nfs3_call_resume (cs);                                          
                goto err;                                                       
        }                                                                       


############################################################3
The problem in the code snippet is that it does not help nfs3_call_resume to differentiate between an ENOENT for a leaf dirent and a non-leaf dirent. In the case of MKDIR, the following is used to check the status of the fh/entry resolution process:

#define nfs3_check_new_fh_resolve_status(cst, nfstat, erlabl)           \
        do {                                                            \
                if (((cst)->resolve_ret == -1) &&                       \
                    ((cst)->resolve_errno != ENOENT)) {                 \
                        nfstat = nfs3_errno_to_nfsstat3 (cs->resolve_errno);\
                        goto erlabl;                                    \
                }                                                       \
        } while (0)         

The check for ENOENT assumes that the ENOENT is applicable only for the leaf node or IOW, the directory being created. This does not hold in the case where the ENOENT was in fact returned to signify a failure to find a non-lead node. Knowing that the dir/leaf node does not exist, the mkdir code continues to creating a directory assuming that the resolver state has filled up the locs for the new directory. This is also not true because the resolution stops after reading the root dir, so the resolver state's loc contains only path only up to / or root. This results in mkdir passing straight through to the subvolumes.
                                                  


> 
> The client operations that reproduce this are:
> [root@domU-12-31-39-0E-B1-16 shehjart]# cat /proc/mounts
> ....
> ....
> ....
> domU-12-31-39-10-6D-E2:/posix /mnt/shehjart/mount nfs
> rw,vers=3,rsize=65536,wsize=65536,acregmin=300,acregmax=300,acdirmin=300,acdirmax=300,hard,intr,proto=tcp,timeo=600,retrans=2,s0
> 
> [root@domU-12-31-39-0E-B1-16 shehjart]# mkdir -p
> mount/test1/test3/test3/test4/test5/test6/
> [root@domU-12-31-39-0E-B1-16 shehjart]# ls -lR mount
> mount:
> total 8
> drwxr-xr-x 3 root root 4096 2010-07-15 06:16 test1
> 
> mount/test1:
> total 8
> drwxr-xr-x 3 root root 4096 2010-07-15 06:16 test3
> 
> mount/test1/test3:
> total 8
> drwxr-xr-x 3 root root 4096 2010-07-15 06:16 test3
> 
> mount/test1/test3/test3:
> total 8
> drwxr-xr-x 3 root root 4096 2010-07-15 06:16 test4
> 
> mount/test1/test3/test3/test4:
> total 8
> drwxr-xr-x 3 root root 4096 2010-07-15 06:16 test5
> 
> mount/test1/test3/test3/test4/test5:
> total 4
> drwxr-xr-x 2 root root 4096 2010-07-15 06:16 test6
> 
> mount/test1/test3/test3/test4/test5/test6:
> total 0
> 
> #############################################################
> ###### Stop NFS server, clean up backend dir, and restart ###
> ###### NFS server                                         ###
> 
> [root@domU-12-31-39-0E-B1-16 shehjart]# mkdir
> mount/test1/test3/test3/test4/test5/test6/test7
> mkdir: cannot create directory
> `mount/test1/test3/test3/test4/test5/test6/test7': Unknown error 526
> 
> This is over simple posix+io-threads vol config.
Comment 12 Shehjar Tikoo 2010-07-27 05:48:18 EDT
Regression test:
http://test.gluster.com/show_bug.cgi?id=88
Comment 13 Anand Avati 2010-07-28 02:24:39 EDT
PATCH: http://patches.gluster.com/patch/3919 in master (nfs3: Stop ESTALE from falling into SERVERFAULT)
Comment 14 Anand Avati 2010-07-28 02:24:44 EDT
PATCH: http://patches.gluster.com/patch/3920 in master (nfs3: Return ESTALE when fh resolver exhausts dirents)
Comment 15 Anand Avati 2010-07-28 02:24:48 EDT
PATCH: http://patches.gluster.com/patch/3921 in master (nfs3: Error returns must check for <0, not == -1)