Bug 825419 - Starting gluster volumes crashes on rdma
Starting gluster volumes crashes on rdma
Status: CLOSED NOTABUG
Product: GlusterFS
Classification: Community
Component: rdma (Show other bugs)
3.3.0
Unspecified Unspecified
unspecified Severity high
: ---
: ---
Assigned To: Raghavendra G
:
Depends On:
Blocks:
  Show dependency treegraph
 
Reported: 2012-05-26 01:04 EDT by Anush Shetty
Modified: 2012-06-01 07:33 EDT (History)
1 user (show)

See Also:
Fixed In Version:
Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of:
Environment:
Last Closed: 2012-06-01 07:33:46 EDT
Type: Bug
Regression: ---
Mount Type: ---
Documentation: ---
CRM:
Verified Versions:
Category: ---
oVirt Team: ---
RHEL 7.3 requirements from Atomic Host:


Attachments (Terms of Use)

  None (edit)
Description Anush Shetty 2012-05-26 01:04:01 EDT
Description of problem: After creating volumes, starting gluster volume fails on rdma. The process forked to start glusterfsd crashes. 



Version-Release number of selected component (if applicable): release-3.3 with http://review.gluster.com/#change,3447 and http://review.gluster.com/#change,3448


How reproducible: Consistently


Steps to Reproduce:
1. gluster volume start volume-name
2.
3.
  
Actual results:

[2012-05-26 00:56:25.919832] E [glusterd-utils.c:3417:glusterd_brick_start] 0-: Unable to start glusterfs, ret: -1
[2012-05-26 00:56:25.919903] E [glusterd-op-sm.c:2324:glusterd_op_ac_send_commit_op] 0-management: Commit failed
[2012-05-26 00:56:25.919940] I [glusterd-op-sm.c:2254:glusterd_op_modify_op_ctx] 0-management: op_ctx modification not required
[2012-05-26 00:56:25.920293] I [glusterd-rpc-ops.c:606:glusterd3_1_cluster_unlock_cbk] 0-glusterd: Received ACC from uuid: 0743f6bd-a928-4069-9fca-7625b2e090d3
[2012-05-26 00:56:25.920338] I [glusterd-rpc-ops.c:606:glusterd3_1_cluster_unlock_cbk] 0-glusterd: Received ACC from uuid: e5472700-975c-44d5-bd13-1812d0a28e4f
[2012-05-26 00:56:25.920372] I [glusterd-rpc-ops.c:606:glusterd3_1_cluster_unlock_cbk] 0-glusterd: Received ACC from uuid: b06d6d38-8164-49dc-8365-0aad7e728ccf
[2012-05-26 00:56:25.920403] I [glusterd-op-sm.c:2627:glusterd_op_txn_complete] 0-glusterd: Cleared local lock
...skipping...
setfsid 1
spinlock 1
epoll.h 1
xattr.h 1
st_atim.tv_nsec 1
package-string: glusterfs 3.3git
/lib64/libc.so.6[0x317e232900]
/lib64/libc.so.6(__libc_fork+0x118)[0x317e2aace8]
/usr/local/lib/libglusterfs.so.0(runner_start+0x1e9)[0x7f31e87a2309]
/usr/local/lib/libglusterfs.so.0(+0x68894)[0x7f31e87a2894]
/usr/local/lib/libglusterfs.so.0(runner_run+0x22)[0x7f31e87a28e0]
/usr/local/lib/glusterfs/3.3git/xlator/mgmt/glusterd.so(glusterd_volume_start_glusterfs+0xb1c)[0x7f31e4f880c6]
/usr/local/lib/glusterfs/3.3git/xlator/mgmt/glusterd.so(glusterd_brick_start+0x1df)[0x7f31e4f90057]
/usr/local/lib/glusterfs/3.3git/xlator/mgmt/glusterd.so(glusterd_op_start_volume+0xa4)[0x7f31e4fcefe3]
/usr/local/lib/glusterfs/3.3git/xlator/mgmt/glusterd.so(glusterd_op_commit_perform+0x7b)[0x7f31e4f7edcb]
/usr/local/lib/glusterfs/3.3git/xlator/mgmt/glusterd.so(+0x42015)[0x7f31e4f7d015]
/usr/local/lib/glusterfs/3.3git/xlator/mgmt/glusterd.so(glusterd_op_sm+0x246)[0x7f31e4f83434]
/usr/local/lib/glusterfs/3.3git/xlator/mgmt/glusterd.so(glusterd3_1_stage_op_cbk+0x497)[0x7f31e4f9b2e5]
/usr/local/lib/libgfrpc.so.0(rpc_clnt_handle_reply+0x211)[0x7f31e852fb94]
/usr/local/lib/libgfrpc.so.0(rpc_clnt_notify+0x2d3)[0x7f31e852ff31]
/usr/local/lib/libgfrpc.so.0(rpc_transport_notify+0x130)[0x7f31e852bf30]
/usr/local/lib/glusterfs/3.3git/rpc-transport/socket.so(socket_event_poll_in+0x54)[0x7f31e4ccb28c]
/usr/local/lib/glusterfs/3.3git/rpc-transport/socket.so(socket_event_handler+0x21d)[0x7f31e4ccb810]
/usr/local/lib/libglusterfs.so.0(+0x4de4c)[0x7f31e8787e4c]
/usr/local/lib/libglusterfs.so.0(+0x4e06f)[0x7f31e878806f]
/usr/local/lib/libglusterfs.so.0(event_dispatch+0x88)[0x7f31e87883fa]
/usr/local/sbin/glusterd(main+0x24d)[0x408486]
/lib64/libc.so.6(__libc_start_main+0xfd)[0x317e21ecdd]
/usr/local/sbin/glusterd[0x404379]
---------
[2012-05-26 00:56:25.919832] E [glusterd-utils.c:3417:glusterd_brick_start] 0-: Unable to start glusterfs, ret: -1
[2012-05-26 00:56:25.919903] E [glusterd-op-sm.c:2324:glusterd_op_ac_send_commit_op] 0-management: Commit failed
[2012-05-26 00:56:25.919940] I [glusterd-op-sm.c:2254:glusterd_op_modify_op_ctx] 0-management: op_ctx modification not required
[2012-05-26 00:56:25.920293] I [glusterd-rpc-ops.c:606:glusterd3_1_cluster_unlock_cbk] 0-glusterd: Received ACC from uuid: 0743f6bd-a928-4069-9fca-7625b2e090d3
[2012-05-26 00:56:25.920338] I [glusterd-rpc-ops.c:606:glusterd3_1_cluster_unlock_cbk] 0-glusterd: Received ACC from uuid: e5472700-975c-44d5-bd13-1812d0a28e4f
[2012-05-26 00:56:25.920372] I [glusterd-rpc-ops.c:606:glusterd3_1_cluster_unlock_cbk] 0-glusterd: Received ACC from uuid: b06d6d38-8164-49dc-8365-0aad7e728ccf
[2012-05-26 00:56:25.920403] I [glusterd-op-sm.c:2627:glusterd_op_txn_complete] 0-glusterd: Cleared local lock


strace log for the same:

trace log:

stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
write(4, "[2012-05-25 11:37:32.444482] I [glusterd-rpc-ops.c:880:glusterd3_1_stage_op_cbk] 0-glusterd: Received ACC from uuid: 0743f6bd-a928-4069-9fca-7625b2e090d3\n", 154) = 154
readv(18, [{"\200\0\0p", 4}], 1)        = 4
readv(18, [{"\0\0\0\17\0\0\0\1", 8}], 1) = 8
readv(18, [{"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\201\0\0\0\272\0\0\0\311\0\0\0\254\0\0\0D\0\0\0I\0\0\0G\0\0\0,\0\0\0\262\0\0\0?\0\0\0|\0\0\0\263\0\0\0\333\0\0\0\236\0\0\0j\0\0\0\363\0\0\0\5\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0\0\0\0", 104}], 1) = 104
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
write(4, "[2012-05-25 11:37:32.444793] I [glusterd-rpc-ops.c:880:glusterd3_1_stage_op_cbk] 0-glusterd: Received ACC from uuid: 81bac9ac-4449-472c-b23f-7cb3db9e6af3\n", 154) = 154
open("/etc/glusterd/hooks/1/start/pre", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 21
getdents(21, /* 2 entries */, 32768)    = 48
getdents(21, /* 0 entries */, 32768)    = 0
close(21)                               = 0
mkdir("/etc/glusterd/vols/rdma/run", 0777) = -1 EEXIST (File exists)
open("/etc/glusterd/vols/rdma/run/10.16.157.97-home-s0.pid", O_RDWR) = -1 ENOENT (No such file or directory)
unlink("/etc/glusterd/vols/rdma/run/10.16.157.97-home-s0.pid") = -1 ENOENT (No such file or directory)
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
write(4, "[2012-05-25 11:37:32.445419] I [glusterd-utils.c:1190:glusterd_volume_start_glusterfs] 0-: About to start glusterfs for brick 10.16.157.97:/home/s0\n", 148) = 148
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 21
bind(21, {sa_family=AF_INET, sin_port=htons(24012), sin_addr=inet_addr("0.0.0.0")}, 16) = 0
close(21)                               = 0
pipe([21, 22])                          = 0
fcntl(22, F_SETFD, FD_CLOEXEC)          = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f29bc6509d0) = 25505
close(4294967295)                       = -1 EBADF (Bad file descriptor)
close(4294967295)                       = -1 EBADF (Bad file descriptor)
close(4294967295)                       = -1 EBADF (Bad file descriptor)
close(22)                               = 0
read(21, "", 4)                         = 0
close(21)                               = 0
wait4(25505, [{WIFSIGNALED(s) && WTERMSIG(s) == SIGSEGV && WCOREDUMP(s)}], 0, NULL) = 25505
close(4294967295)                       = -1 EBADF (Bad file descriptor)
close(4294967295)                       = -1 EBADF (Bad file descriptor)
close(4294967295)                       = -1 EBADF (Bad file descriptor)
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
write(4, "[2012-05-25 11:37:33.000754] E [glusterd-utils.c:3417:glusterd_brick_start] 0-: Unable to start glusterfs, ret: -1\n", 115) = 115
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
write(4, "[2012-05-25 11:37:33.000966] E [glusterd-op-sm.c:2324:glusterd_op_ac_send_commit_op] 0-management: Commit failed\n", 113) = 113
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
write(4, "[2012-05-25 11:37:33.001133] I [glusterd-op-sm.c:2254:glusterd_op_modify_op_ctx] 0-management: op_ctx modification not required\n", 128) = 128 


Expected results:


Additional info:
Comment 1 Raghavendra G 2012-06-01 07:33:46 EDT
Crash is observed only if above said patches are applied. Those two patches increase the memory locked by glusterfs process for infiniband communication. Since, those two patches are abandoned, marking this bug as invalid.

regards,
Raghavendra.

Note You need to log in before you can comment on or make changes to this bug.