Bug 809352 - glusterfs client crashed while running dbench
glusterfs client crashed while running dbench
Status: CLOSED NOTABUG
Product: GlusterFS
Classification: Community
Component: replicate (Show other bugs)
mainline
Unspecified Unspecified
unspecified Severity high
: ---
: ---
Assigned To: Pranith Kumar K
:
Depends On:
Blocks:
  Show dependency treegraph
 
Reported: 2012-04-03 03:34 EDT by Shwetha Panduranga
Modified: 2012-04-04 07:05 EDT (History)
1 user (show)

See Also:
Fixed In Version:
Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of:
Environment:
Last Closed: 2012-04-04 07:05:35 EDT
Type: Bug
Regression: ---
Mount Type: ---
Documentation: ---
CRM:
Verified Versions:
Category: ---
oVirt Team: ---
RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: ---


Attachments (Terms of Use)
Client Log File (6.79 MB, text/x-log)
2012-04-03 03:36 EDT, Shwetha Panduranga
no flags Details

  None (edit)
Description Shwetha Panduranga 2012-04-03 03:34:50 EDT
Description of problem:
(gdb) bt full
#0  0x0000003f71e32885 in raise () from /lib64/libc.so.6
No symbol table info available.
#1  0x0000003f71e34065 in abort () from /lib64/libc.so.6
No symbol table info available.
#2  0x0000003f71e2b9fe in __assert_fail_base () from /lib64/libc.so.6
No symbol table info available.
#3  0x0000003f71e2bac0 in __assert_fail () from /lib64/libc.so.6
No symbol table info available.
#4  0x00007fa54dfe12c9 in client_lookup (frame=0x7fa551548448, this=0x5ede830, loc=0x7fa52082333c, xdata=0x144356c) at client.c:367
        ret = -1
        conf = 0x5f207b0
        proc = 0x0
        args = {loc = 0x0, fd = 0x0, linkname = 0x0, iobref = 0x0, vector = 0x0, xattr = 0x0, stbuf = 0x0, oldloc = 0x0, newloc = 0x0, name = 0x0, flock = 0x0, 
          volume = 0x0, basename = 0x0, offset = 0, mask = 0, cmd = 0, size = 0, mode = 0, rdev = 0, flags = 0, count = 0, datasync = 0, cmd_entrylk = ENTRYLK_LOCK, 
          type = ENTRYLK_RDLCK, optype = GF_XATTROP_ADD_ARRAY, valid = 0, len = 0, umask = 0, xdata = 0x0}
        op_errno = 107
        __PRETTY_FUNCTION__ = "client_lookup"
        __FUNCTION__ = "client_lookup"
#5  0x00007fa54dd8d5e0 in afr_sh_common_lookup (frame=0x7fa551355f9c, this=0x5ee18f0, loc=0x7fa52082333c, lookup_done=0x7fa54dd9b920 <afr_sh_entry_fix>, gfid=0x0, 
    flags=3, xdata=0x0) at afr-self-heal-common.c:1794
        _new = 0x7fa551548448
        old_THIS = 0x5ee18f0
        tmp_cbk = 0x7fa54dd8b8a3 <afr_sh_common_lookup_cbk>
        local = 0x7fa520823304
        i = 0
        call_count = 3
        priv = 0x5f690c0
        xattr_req = 0x144356c
        sh = 0x7fa5208258cc
        __FUNCTION__ = "afr_sh_common_lookup"
        __PRETTY_FUNCTION__ = "afr_sh_common_lookup"
#6  0x00007fa54dd9bdc2 in afr_sh_post_nonblocking_entry_cbk (frame=0x7fa551355f9c, this=0x5ee18f0) at afr-self-heal-entry.c:2380
        int_lock = 0x7fa5208233fc
        local = 0x7fa520823304
        sh = 0x7fa5208258cc
        __FUNCTION__ = "afr_sh_post_nonblocking_entry_cbk"
---Type <return> to continue, or q <return> to quit---
#7  0x00007fa54dda42bf in afr_nonblocking_entrylk_cbk (frame=0x7fa551355f9c, cookie=0x1, this=0x5ee18f0, op_ret=0, op_errno=0, xdata=0x0) at afr-lk-common.c:1215
        int_lock = 0x7fa5208233fc
        local = 0x7fa520823304
        call_count = 0
        child_index = 1
        __FUNCTION__ = "afr_nonblocking_entrylk_cbk"
#8  0x00007fa54dff626f in client3_1_entrylk_cbk (req=0x6f9697c, iov=0x6f969bc, count=1, myframe=0x7fa55155278c) at client3_1-fops.c:1592
        fn = 0x7fa54dda4005 <afr_nonblocking_entrylk_cbk>
        _parent = 0x7fa551355f9c
        old_THIS = 0x5edf1f0
        __local = 0x0
        frame = 0x7fa55155278c
        rsp = {op_ret = 0, op_errno = 0, xdata = {xdata_len = 0, xdata_val = 0x0}}
        ret = 12
        this = 0x5edf1f0
        xdata = 0x0
        __FUNCTION__ = "client3_1_entrylk_cbk"
#9  0x00007fa5524ec9fc in rpc_clnt_handle_reply (clnt=0x5f0c980, pollin=0x5e81860) at rpc-clnt.c:797
        conn = 0x5f0c9b0
        saved_frame = 0x6bf4d4c
        ret = 0
        req = 0x6f9697c
        xid = 22161
        __FUNCTION__ = "rpc_clnt_handle_reply"
#10 0x00007fa5524ecd99 in rpc_clnt_notify (trans=0x6beb210, mydata=0x5f0c9b0, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x5e81860) at rpc-clnt.c:916
        conn = 0x5f0c9b0
        clnt = 0x5f0c980
        ret = -1
        req_info = 0x0
        pollin = 0x5e81860
        tv = {tv_sec = 0, tv_usec = 0}
#11 0x00007fa5524e8e7c in rpc_transport_notify (this=0x6beb210, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x5e81860) at rpc-transport.c:498
        ret = -1
        __FUNCTION__ = "rpc_transport_notify"
#12 0x00007fa54ee43270 in socket_event_poll_in (this=0x6beb210) at socket.c:1686
        ret = 0
---Type <return> to continue, or q <return> to quit---
        pollin = 0x5e81860
#13 0x00007fa54ee437f4 in socket_event_handler (fd=24, idx=2, data=0x6beb210, poll_in=1, poll_out=0, poll_err=0) at socket.c:1801
        this = 0x6beb210
        priv = 0x5f46f00
        ret = 0
        __FUNCTION__ = "socket_event_handler"
#14 0x00007fa552744628 in event_dispatch_epoll_handler (event_pool=0x14353a0, events=0x1458420, i=4) at event.c:794
        event_data = 0x1458454
        handler = 0x7fa54ee435d7 <socket_event_handler>
        data = 0x6beb210
        idx = 2
        ret = -1
        __FUNCTION__ = "event_dispatch_epoll_handler"
#15 0x00007fa55274484b in event_dispatch_epoll (event_pool=0x14353a0) at event.c:856
        events = 0x1458420
        size = 5
        i = 4
        ret = 0
        __FUNCTION__ = "event_dispatch_epoll"
#16 0x00007fa552744bd6 in event_dispatch (event_pool=0x14353a0) at event.c:956
        ret = -1
        __FUNCTION__ = "event_dispatch"
#17 0x0000000000408057 in main (argc=4, argv=0x7fff2a82cbd8) at glusterfsd.c:1650
        ctx = 0x141d010
        ret = 0
        __FUNCTION__ = "main"
(gdb) Killed


Version-Release number of selected component (if applicable):
mainline

script1.sh (to run on storage node for graph changes):-
-----------------------------------------------------
#!/bin/bash

function sleep_now {
	sleep 30
}

for i in {1..10}
	do
	gluster volume set dstore stat-prefetch off
	sleep_now
	gluster volume quota dstore enable
	sleep_now
	gluster volume set dstore stat-prefetch on
	sleep_now
	gluster volume quota dstore disable --mode=script
	sleep_now
	done

Steps to Reproduce:
1.create a distribute-replicate volume(3X3). start the volume
2.create 2 fuse mounts on different clients
3.run "dbench -s -F -S --stat-check 10" on one fuse mount
4.run "find . | xargs stat" in a loop on other fuse mount
5.on one of the storage node run the "script1"
6.while dbench in progress bring down bricks from each replica set. 
7.bring back the brick online. 

Actual results:
dbench failed with "Transport End Point Not Connected". 

Additional info:
[2012-04-03 17:45:50.981366] I [afr-self-heal-common.c:2045:afr_self_heal_completion_cbk] 8-dstore-replicate-0: background  entry self-heal completed on /clients/client6/~dmtmp/PM
pending frames:
frame : type(1) op(LOOKUP)
frame : type(1) op(LOOKUP)
frame : type(1) op(OPENDIR)
frame : type(1) op(OPENDIR)
frame : type(1) op(LOOKUP)
frame : type(1) op(LOOKUP)
frame : type(1) op(LOOKUP)
frame : type(1) op(LOOKUP)

patchset: git://git.gluster.com/glusterfs.git
signal received: 6
time of crash: 2012-04-03 17:45:50
configuration details:
argp 1
backtrace 1
dlfcn 1
fdatasync 1
libpthread 1Expected results:

llistxattr 1
setfsid 1
spinlock 1
epoll.h 1
xattr.h 1
st_atim.tv_nsec 1
package-string: glusterfs 3git
/lib64/libc.so.6[0x3f71e32900]
/lib64/libc.so.6(gsignal+0x35)[0x3f71e32885]

/lib64/libc.so.6(abort+0x175)[0x3f71e34065]
/lib64/libc.so.6[0x3f71e2b9fe]
/lib64/libc.so.6(__assert_perror_fail+0x0)[0x3f71e2bac0]
/usr/local/lib/glusterfs/3git/xlator/protocol/client.so(client_lookup+0xe6)[0x7fa54dfe12c9]
/usr/local/lib/glusterfs/3git/xlator/cluster/replicate.so(afr_sh_common_lookup+0x4f4)[0x7fa54dd8d5e0]
/usr/local/lib/glusterfs/3git/xlator/cluster/replicate.so(afr_sh_post_nonblocking_entry_cbk+0x17d)[0x7fa54dd9bdc2]
/usr/local/lib/glusterfs/3git/xlator/cluster/replicate.so(+0x592bf)[0x7fa54dda42bf]
/usr/local/lib/glusterfs/3git/xlator/protocol/client.so(client3_1_entrylk_cbk+0x4ae)[0x7fa54dff626f]
/usr/local/lib/libgfrpc.so.0(rpc_clnt_handle_reply+0x211)[0x7fa5524ec9fc]
/usr/local/lib/libgfrpc.so.0(rpc_clnt_notify+0x2d3)[0x7fa5524ecd99]
/usr/local/lib/libgfrpc.so.0(rpc_transport_notify+0x130)[0x7fa5524e8e7c]
/usr/local/lib/glusterfs/3git/rpc-transport/socket.so(socket_event_poll_in+0x54)[0x7fa54ee43270]
/usr/local/lib/glusterfs/3git/rpc-transport/socket.so(socket_event_handler+0x21d)[0x7fa54ee437f4]
/usr/local/lib/libglusterfs.so.0(+0x4e628)[0x7fa552744628]
/usr/local/lib/libglusterfs.so.0(+0x4e84b)[0x7fa55274484b]
/usr/local/lib/libglusterfs.so.0(event_dispatch+0x88)[0x7fa552744bd6]
/usr/local/sbin/glusterfs(main+0x238)[0x408057]
/lib64/libc.so.6(__libc_start_main+0xfd)[0x3f71e1ecdd]
/usr/local/sbin/glusterfs[0x4040c9]
Comment 1 Shwetha Panduranga 2012-04-03 03:36:50 EDT
Created attachment 574757 [details]
Client Log File

Note You need to log in before you can comment on or make changes to this bug.