Bug 825419

Summary: Starting gluster volumes crashes on rdma
Product: [Community] GlusterFS Reporter: Anush Shetty <ashetty>
Component: rdmaAssignee: Raghavendra G <rgowdapp>
Status: CLOSED NOTABUG QA Contact:
Severity: high Docs Contact:
Priority: unspecified    
Version: 3.3.0CC: gluster-bugs
Target Milestone: ---   
Target Release: ---   
Hardware: Unspecified   
OS: Unspecified   
Whiteboard:
Fixed In Version: Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of: Environment:
Last Closed: 2012-06-01 07:33:46 EDT Type: Bug
Regression: --- Mount Type: ---
Documentation: --- CRM:
Verified Versions: Category: ---
oVirt Team: --- RHEL 7.3 requirements from Atomic Host:

Description Anush Shetty 2012-05-26 01:04:01 EDT
Description of problem: After creating volumes, starting gluster volume fails on rdma. The process forked to start glusterfsd crashes. 



Version-Release number of selected component (if applicable): release-3.3 with http://review.gluster.com/#change,3447 and http://review.gluster.com/#change,3448


How reproducible: Consistently


Steps to Reproduce:
1. gluster volume start volume-name
2.
3.
  
Actual results:

[2012-05-26 00:56:25.919832] E [glusterd-utils.c:3417:glusterd_brick_start] 0-: Unable to start glusterfs, ret: -1
[2012-05-26 00:56:25.919903] E [glusterd-op-sm.c:2324:glusterd_op_ac_send_commit_op] 0-management: Commit failed
[2012-05-26 00:56:25.919940] I [glusterd-op-sm.c:2254:glusterd_op_modify_op_ctx] 0-management: op_ctx modification not required
[2012-05-26 00:56:25.920293] I [glusterd-rpc-ops.c:606:glusterd3_1_cluster_unlock_cbk] 0-glusterd: Received ACC from uuid: 0743f6bd-a928-4069-9fca-7625b2e090d3
[2012-05-26 00:56:25.920338] I [glusterd-rpc-ops.c:606:glusterd3_1_cluster_unlock_cbk] 0-glusterd: Received ACC from uuid: e5472700-975c-44d5-bd13-1812d0a28e4f
[2012-05-26 00:56:25.920372] I [glusterd-rpc-ops.c:606:glusterd3_1_cluster_unlock_cbk] 0-glusterd: Received ACC from uuid: b06d6d38-8164-49dc-8365-0aad7e728ccf
[2012-05-26 00:56:25.920403] I [glusterd-op-sm.c:2627:glusterd_op_txn_complete] 0-glusterd: Cleared local lock
...skipping...
setfsid 1
spinlock 1
epoll.h 1
xattr.h 1
st_atim.tv_nsec 1
package-string: glusterfs 3.3git
/lib64/libc.so.6[0x317e232900]
/lib64/libc.so.6(__libc_fork+0x118)[0x317e2aace8]
/usr/local/lib/libglusterfs.so.0(runner_start+0x1e9)[0x7f31e87a2309]
/usr/local/lib/libglusterfs.so.0(+0x68894)[0x7f31e87a2894]
/usr/local/lib/libglusterfs.so.0(runner_run+0x22)[0x7f31e87a28e0]
/usr/local/lib/glusterfs/3.3git/xlator/mgmt/glusterd.so(glusterd_volume_start_glusterfs+0xb1c)[0x7f31e4f880c6]
/usr/local/lib/glusterfs/3.3git/xlator/mgmt/glusterd.so(glusterd_brick_start+0x1df)[0x7f31e4f90057]
/usr/local/lib/glusterfs/3.3git/xlator/mgmt/glusterd.so(glusterd_op_start_volume+0xa4)[0x7f31e4fcefe3]
/usr/local/lib/glusterfs/3.3git/xlator/mgmt/glusterd.so(glusterd_op_commit_perform+0x7b)[0x7f31e4f7edcb]
/usr/local/lib/glusterfs/3.3git/xlator/mgmt/glusterd.so(+0x42015)[0x7f31e4f7d015]
/usr/local/lib/glusterfs/3.3git/xlator/mgmt/glusterd.so(glusterd_op_sm+0x246)[0x7f31e4f83434]
/usr/local/lib/glusterfs/3.3git/xlator/mgmt/glusterd.so(glusterd3_1_stage_op_cbk+0x497)[0x7f31e4f9b2e5]
/usr/local/lib/libgfrpc.so.0(rpc_clnt_handle_reply+0x211)[0x7f31e852fb94]
/usr/local/lib/libgfrpc.so.0(rpc_clnt_notify+0x2d3)[0x7f31e852ff31]
/usr/local/lib/libgfrpc.so.0(rpc_transport_notify+0x130)[0x7f31e852bf30]
/usr/local/lib/glusterfs/3.3git/rpc-transport/socket.so(socket_event_poll_in+0x54)[0x7f31e4ccb28c]
/usr/local/lib/glusterfs/3.3git/rpc-transport/socket.so(socket_event_handler+0x21d)[0x7f31e4ccb810]
/usr/local/lib/libglusterfs.so.0(+0x4de4c)[0x7f31e8787e4c]
/usr/local/lib/libglusterfs.so.0(+0x4e06f)[0x7f31e878806f]
/usr/local/lib/libglusterfs.so.0(event_dispatch+0x88)[0x7f31e87883fa]
/usr/local/sbin/glusterd(main+0x24d)[0x408486]
/lib64/libc.so.6(__libc_start_main+0xfd)[0x317e21ecdd]
/usr/local/sbin/glusterd[0x404379]
---------
[2012-05-26 00:56:25.919832] E [glusterd-utils.c:3417:glusterd_brick_start] 0-: Unable to start glusterfs, ret: -1
[2012-05-26 00:56:25.919903] E [glusterd-op-sm.c:2324:glusterd_op_ac_send_commit_op] 0-management: Commit failed
[2012-05-26 00:56:25.919940] I [glusterd-op-sm.c:2254:glusterd_op_modify_op_ctx] 0-management: op_ctx modification not required
[2012-05-26 00:56:25.920293] I [glusterd-rpc-ops.c:606:glusterd3_1_cluster_unlock_cbk] 0-glusterd: Received ACC from uuid: 0743f6bd-a928-4069-9fca-7625b2e090d3
[2012-05-26 00:56:25.920338] I [glusterd-rpc-ops.c:606:glusterd3_1_cluster_unlock_cbk] 0-glusterd: Received ACC from uuid: e5472700-975c-44d5-bd13-1812d0a28e4f
[2012-05-26 00:56:25.920372] I [glusterd-rpc-ops.c:606:glusterd3_1_cluster_unlock_cbk] 0-glusterd: Received ACC from uuid: b06d6d38-8164-49dc-8365-0aad7e728ccf
[2012-05-26 00:56:25.920403] I [glusterd-op-sm.c:2627:glusterd_op_txn_complete] 0-glusterd: Cleared local lock


strace log for the same:

trace log:

stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
write(4, "[2012-05-25 11:37:32.444482] I [glusterd-rpc-ops.c:880:glusterd3_1_stage_op_cbk] 0-glusterd: Received ACC from uuid: 0743f6bd-a928-4069-9fca-7625b2e090d3\n", 154) = 154
readv(18, [{"\200\0\0p", 4}], 1)        = 4
readv(18, [{"\0\0\0\17\0\0\0\1", 8}], 1) = 8
readv(18, [{"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\201\0\0\0\272\0\0\0\311\0\0\0\254\0\0\0D\0\0\0I\0\0\0G\0\0\0,\0\0\0\262\0\0\0?\0\0\0|\0\0\0\263\0\0\0\333\0\0\0\236\0\0\0j\0\0\0\363\0\0\0\5\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0\0\0\0", 104}], 1) = 104
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
write(4, "[2012-05-25 11:37:32.444793] I [glusterd-rpc-ops.c:880:glusterd3_1_stage_op_cbk] 0-glusterd: Received ACC from uuid: 81bac9ac-4449-472c-b23f-7cb3db9e6af3\n", 154) = 154
open("/etc/glusterd/hooks/1/start/pre", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 21
getdents(21, /* 2 entries */, 32768)    = 48
getdents(21, /* 0 entries */, 32768)    = 0
close(21)                               = 0
mkdir("/etc/glusterd/vols/rdma/run", 0777) = -1 EEXIST (File exists)
open("/etc/glusterd/vols/rdma/run/10.16.157.97-home-s0.pid", O_RDWR) = -1 ENOENT (No such file or directory)
unlink("/etc/glusterd/vols/rdma/run/10.16.157.97-home-s0.pid") = -1 ENOENT (No such file or directory)
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
write(4, "[2012-05-25 11:37:32.445419] I [glusterd-utils.c:1190:glusterd_volume_start_glusterfs] 0-: About to start glusterfs for brick 10.16.157.97:/home/s0\n", 148) = 148
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 21
bind(21, {sa_family=AF_INET, sin_port=htons(24012), sin_addr=inet_addr("0.0.0.0")}, 16) = 0
close(21)                               = 0
pipe([21, 22])                          = 0
fcntl(22, F_SETFD, FD_CLOEXEC)          = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f29bc6509d0) = 25505
close(4294967295)                       = -1 EBADF (Bad file descriptor)
close(4294967295)                       = -1 EBADF (Bad file descriptor)
close(4294967295)                       = -1 EBADF (Bad file descriptor)
close(22)                               = 0
read(21, "", 4)                         = 0
close(21)                               = 0
wait4(25505, [{WIFSIGNALED(s) && WTERMSIG(s) == SIGSEGV && WCOREDUMP(s)}], 0, NULL) = 25505
close(4294967295)                       = -1 EBADF (Bad file descriptor)
close(4294967295)                       = -1 EBADF (Bad file descriptor)
close(4294967295)                       = -1 EBADF (Bad file descriptor)
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
write(4, "[2012-05-25 11:37:33.000754] E [glusterd-utils.c:3417:glusterd_brick_start] 0-: Unable to start glusterfs, ret: -1\n", 115) = 115
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
write(4, "[2012-05-25 11:37:33.000966] E [glusterd-op-sm.c:2324:glusterd_op_ac_send_commit_op] 0-management: Commit failed\n", 113) = 113
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
write(4, "[2012-05-25 11:37:33.001133] I [glusterd-op-sm.c:2254:glusterd_op_modify_op_ctx] 0-management: op_ctx modification not required\n", 128) = 128 


Expected results:


Additional info:
Comment 1 Raghavendra G 2012-06-01 07:33:46 EDT
Crash is observed only if above said patches are applied. Those two patches increase the memory locked by glusterfs process for infiniband communication. Since, those two patches are abandoned, marking this bug as invalid.

regards,
Raghavendra.