Bug 1702185 - coredump reported by test ./tests/bugs/glusterd/bug-1699339.t
Summary: coredump reported by test ./tests/bugs/glusterd/bug-1699339.t
Keywords:
Status: CLOSED NEXTRELEASE
Alias: None
Product: GlusterFS
Classification: Community
Component: tests
Version: mainline
Hardware: Unspecified
OS: Unspecified
unspecified
unspecified
Target Milestone: ---
Assignee: bugs@gluster.org
QA Contact:
URL:
Whiteboard:
Depends On:
Blocks: 1702240
TreeView+ depends on / blocked
 
Reported: 2019-04-23 07:42 UTC by Mohammed Rafi KC
Modified: 2019-04-29 05:52 UTC (History)
2 users (show)

Fixed In Version:
Doc Type: If docs needed, set a value
Doc Text:
Clone Of:
: 1702240 (view as bug list)
Environment:
Last Closed: 2019-04-24 00:11:39 UTC
Regression: ---
Mount Type: ---
Documentation: ---
CRM:
Verified Versions:
Embargoed:


Attachments (Terms of Use)


Links
System ID Private Priority Status Summary Last Updated
Gluster.org Gerrit 22606 0 None Merged glusterd/shd: Keep a ref on volinfo until attach rpc execute cbk 2019-04-24 00:11:38 UTC

Description Mohammed Rafi KC 2019-04-23 07:42:21 UTC
Description of problem:

Upstream test ./tests/bugs/glusterd/bug-1699339.t failed the regression with a coredump. backtrace of the core can be tracked from https://build.gluster.org/job/regression-test-with-multiplex/1270/display/redirect?page=changes

Version-Release number of selected component (if applicable):


How reproducible:


Steps to Reproduce:
1.
2.
3.

Actual results:


Expected results:


Additional info:

Comment 1 Worker Ant 2019-04-23 07:44:39 UTC
REVIEW: https://review.gluster.org/22606 (glusterd/shd: Keep a ref on volinfo until attach rpc execute cbk) posted (#1) for review on master by mohammed rafi  kc

Comment 2 Worker Ant 2019-04-24 00:11:39 UTC
REVIEW: https://review.gluster.org/22606 (glusterd/shd: Keep a ref on volinfo until attach rpc execute cbk) merged (#2) on master by Atin Mukherjee

Comment 3 Nithya Balachandran 2019-04-24 02:56:58 UTC
(In reply to Mohammed Rafi KC from comment #0)
> Description of problem:
> 
> Upstream test ./tests/bugs/glusterd/bug-1699339.t failed the regression with
> a coredump. backtrace of the core can be tracked from
> https://build.gluster.org/job/regression-test-with-multiplex/1270/display/
> redirect?page=changes

This will not be available always. Please make it a point to put the backtrace in a comment in the BZ for all crashes.
> 
> Version-Release number of selected component (if applicable):
> 
> 
> How reproducible:
> 
> 
> Steps to Reproduce:
> 1.
> 2.
> 3.
> 
> Actual results:
> 
> 
> Expected results:
> 
> 
> Additional info:

Comment 4 Mohammed Rafi KC 2019-04-29 05:52:52 UTC
Backtrace:

Thread 1 (Thread 0x7feb839dd700 (LWP 1191)):
#0  0x00007feb9107fef9 in vfprintf () from /lib64/libc.so.6
No symbol table info available.
#1  0x00007feb910aac33 in vasprintf () from /lib64/libc.so.6
No symbol table info available.
#2  0x00007feb92a444b1 in _gf_msg (domain=0x2198c20 "management", file=0x7feb86c96298 "/home/jenkins/root/workspace/regression-test-with-multiplex/xlators/mgmt/glusterd/src/glusterd-svc-helper.c", function=0x7feb86c968e0 <__FUNCTION__.31158> "glusterd_svc_attach_cbk", line=684, level=GF_LOG_INFO, errnum=0, trace=0, msgid=106617, fmt=0x7feb86c964f8 "svc %s of volume %s attached successfully to pid %d") at /home/jenkins/root/workspace/regression-test-with-multiplex/libglusterfs/src/logging.c:2113
        ret = 0
        msgstr = 0x0
        ap = {{gp_offset = 48, fp_offset = 48, overflow_arg_area = 0x7feb839dc908, reg_save_area = 0x7feb839dc820}}
        this = 0x2197b90
        ctx = 0x214d010
        callstr = '\000' <repeats 4095 times>
        passcallstr = 0
        log_inited = 1
        __PRETTY_FUNCTION__ = "_gf_msg"
#3  0x00007feb86c4086b in glusterd_svc_attach_cbk (req=0x7feb6c02be88, iov=0x7feb6c02bec0, count=1, v_frame=0x7feb6c01a5c8) at /home/jenkins/root/workspace/regression-test-with-multiplex/xlators/mgmt/glusterd/src/glusterd-svc-helper.c:682
        frame = 0x7feb6c01a5c8
        volinfo = 0x0
        shd = 0x0
        svc = 0x2244fd0
        parent_svc = 0x0
        mux_proc = 0x0
        conf = 0x21e6290
        flag = 0x7feb6c01f0d0
        this = 0x2197b90
        pid = -1
        ret = 16
        rsp = {op_ret = 0, op_errno = 0, spec = 0x7feb74099370 "", xdata = {xdata_len = 0, xdata_val = 0x0}}
        __FUNCTION__ = "glusterd_svc_attach_cbk"
#4  0x00007feb927e154b in rpc_clnt_handle_reply (clnt=0x7feb6c005710, pollin=0x7feb74083af0) at /home/jenkins/root/workspace/regression-test-with-multiplex/rpc/rpc-lib/src/rpc-clnt.c:764
        conn = 0x7feb6c005740
        saved_frame = 0x7feb6c035c38
        ret = 0
        req = 0x7feb6c02be88
        xid = 30
        __FUNCTION__ = "rpc_clnt_handle_reply"
#5  0x00007feb927e1a74 in rpc_clnt_notify (trans=0x7feb6c040db0, mydata=0x7feb6c005740, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x7feb74083af0) at /home/jenkins/root/workspace/regression-test-with-multiplex/rpc/rpc-lib/src/rpc-clnt.c:931
        conn = 0x7feb6c005740
        clnt = 0x7feb6c005710
        ret = -1
        req_info = 0x0
        pollin = 0x7feb74083af0
        clnt_mydata = 0x0
        old_THIS = 0x2197b90
        __FUNCTION__ = "rpc_clnt_notify"
#6  0x00007feb927dda5b in rpc_transport_notify (this=0x7feb6c040db0, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x7feb74083af0) at /home/jenkins/root/workspace/regression-test-with-multiplex/rpc/rpc-lib/src/rpc-transport.c:549
        ret = -1
        __FUNCTION__ = "rpc_transport_notify"
#7  0x00007feb85d30c79 in socket_event_poll_in_async (xl=0x2197b90, async=0x7feb74083c18) at /home/jenkins/root/workspace/regression-test-with-multiplex/rpc/rpc-transport/socket/src/socket.c:2569
        pollin = 0x7feb74083af0
        this = 0x7feb6c040db0
        priv = 0x7feb6c040a30
#8  0x00007feb85d2844c in gf_async (async=0x7feb74083c18, xl=0x2197b90, cbk=0x7feb85d30c22 <socket_event_poll_in_async>) at /home/jenkins/root/workspace/regression-test-with-multiplex/libglusterfs/src/glusterfs/async.h:189
        __FUNCTION__ = "gf_async"
#9  0x00007feb85d30e07 in socket_event_poll_in (this=0x7feb6c040db0, notify_handled=true) at /home/jenkins/root/workspace/regression-test-with-multiplex/rpc/rpc-transport/socket/src/socket.c:2610
        ret = 0
        pollin = 0x7feb74083af0
        priv = 0x7feb6c040a30
        ctx = 0x214d010
#10 0x00007feb85d31db0 in socket_event_handler (fd=69, idx=31, gen=4, data=0x7feb6c040db0, poll_in=1, poll_out=0, poll_err=0, event_thread_died=0 '\000') at /home/jenkins/root/workspace/regression-test-with-multiplex/rpc/rpc-transport/socket/src/socket.c:3001
        this = 0x7feb6c040db0
        priv = 0x7feb6c040a30
        ret = 0
        ctx = 0x214d010
        socket_closed = false
        notify_handled = false
        __FUNCTION__ = "socket_event_handler"
#11 0x00007feb92abeca4 in event_dispatch_epoll_handler (event_pool=0x2183e90, event=0x7feb839dce80) at /home/jenkins/root/workspace/regression-test-with-multiplex/libglusterfs/src/event-epoll.c:648
        ev_data = 0x7feb839dce84
        slot = 0x21c79d0
        handler = 0x7feb85d3190b <socket_event_handler>
        data = 0x7feb6c040db0
        idx = 31
        gen = 4
        ret = 0
        fd = 69
        handled_error_previously = false
        __FUNCTION__ = "event_dispatch_epoll_handler"
#12 0x00007feb92abf1bd in event_dispatch_epoll_worker (data=0x2203eb0) at /home/jenkins/root/workspace/regression-test-with-multiplex/libglusterfs/src/event-epoll.c:761
        event = {events = 1, data = {ptr = 0x40000001f, fd = 31, u32 = 31, u64 = 17179869215}}
        ret = 1
        ev_data = 0x2203eb0
        event_pool = 0x2183e90
        myindex = 1
        timetodie = 0
        gen = 0
        poller_death_notify = {next = 0x0, prev = 0x0}
        slot = 0x0
        tmp = 0x0
        __FUNCTION__ = "event_dispatch_epoll_worker"
#13 0x00007feb91869dd5 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#14 0x00007feb91130ead in clone () from /lib64/libc.so.6
~                                                                                          

RCA:

During glusterd restart, we start a new shd daemon if not already running and attach all subsequent shd graphs to the existing daemon. So when the glusterd wait for an attach request to be processed, there is a chance that the volinfo might be freed, by a thread which handles handshake or even an epoll thread that stops and delete the volinfo. So we have to keep a ref on volinfo when we send an attach request.


Note You need to log in before you can comment on or make changes to this bug.