| Summary: | crashes in callbacks of fops like lookup, readdirp in protocol/client | ||||||||
|---|---|---|---|---|---|---|---|---|---|
| Product: | [Community] GlusterFS | Reporter: | Raghavendra G <raghavendra> | ||||||
| Component: | protocol | Assignee: | Raghavendra G <raghavendra> | ||||||
| Status: | CLOSED WORKSFORME | QA Contact: | |||||||
| Severity: | low | Docs Contact: | |||||||
| Priority: | low | ||||||||
| Version: | mainline | CC: | gluster-bugs, shehjart | ||||||
| Target Milestone: | --- | ||||||||
| Target Release: | --- | ||||||||
| Hardware: | All | ||||||||
| OS: | Linux | ||||||||
| Whiteboard: | |||||||||
| Fixed In Version: | Doc Type: | Bug Fix | |||||||
| Doc Text: | Story Points: | --- | |||||||
| Clone Of: | Environment: | ||||||||
| Last Closed: | Type: | --- | |||||||
| Regression: | --- | Mount Type: | --- | ||||||
| Documentation: | --- | CRM: | |||||||
| Verified Versions: | Category: | --- | |||||||
| oVirt Team: | --- | RHEL 7.3 requirements from Atomic Host: | |||||||
| Cloudforms Team: | --- | Target Upstream Version: | |||||||
| Attachments: |
|
||||||||
I observed atleast two crashes in protocol/client in client_readdirp_cbk and client_lookup_cbk. Both of these were crashes were due to frame being passed as argument already freed.
The guess I am doing is that either in replicate or distribute, STACK_DESTROY was called directly or indirectly through STACK_UNWIND on frame->root.
I've attached the logfiles for crash in client_lookup_cbk.
Here is the backtrace of crash in client_lookup_cbk.
#0 0xb7ebee48 in gf_print_trace (signum=11) at ../../../libglusterfs/src/common-utils.c:413
tmp = (call_frame_t *) 0xb02774cc
ctx = (glusterfs_ctx_t *) 0x8051008
trav = (struct list_head *) 0xb0277458
tm = (struct tm *) 0x0
msg = "frame : type(1) op(WRITE)\n", '\0' <repeats 997 times>
timestr = '\0' <repeats 255 times>
utime = 0
ret = 26
fd = 4
#1 <signal handler called>
No symbol table info available.
#2 0xb7e8f276 in pthread_spin_lock () from /lib/libpthread.so.0
No symbol table info available.
#3 0xb7ec5524 in inode_ctx_get2 (inode=0xa9, xlator=0x58a68b7, value1=0xad1ff158, value2=0xad1ff150)
at ../../../libglusterfs/src/inode.c:1213
ret = 0
#4 0xb74a3495 in client_lookup_cbk (frame=0xaa0d8287, hdr=0x8309188, hdrlen=504, iobuf=0x0)
at ../../../../../xlators/protocol/client/src/client-protocol.c:4897
stbuf = {st_dev = 5412414050074099713, __pad1 = 0, __st_ino = 0, st_mode = 16877, st_nlink = 3,
st_uid = 0, st_gid = 0, st_rdev = 0, __pad2 = 0, st_size = 4096, st_blksize = 4096, st_blocks = 16, st_atim = {
tv_sec = 1260236810, tv_nsec = 0}, st_mtim = {tv_sec = 1260176040, tv_nsec = 0}, st_ctim = {
tv_sec = 1260236804, tv_nsec = 0}, st_ino = 1}
postparent = {st_dev = 0, __pad1 = 0, __st_ino = 0, st_mode = 0, st_nlink = 0, st_uid = 0, st_gid = 0,
st_rdev = 0, __pad2 = 0, st_size = 0, st_blksize = 0, st_blocks = 0, st_atim = {tv_sec = 0, tv_nsec = 0},
st_mtim = {tv_sec = 0, tv_nsec = 0}, st_ctim = {tv_sec = 0, tv_nsec = 0}, st_ino = 0}
inode = (inode_t *) 0xa9
xattr = (dict_t *) 0x0
rsp = (gf_fop_lookup_rsp_t *) 0x83091f4
op_ret = -1
op_errno = 0
dict_len = 0
dictbuf = 0x0
ret = -1
gf_errno = 0
local = (client_local_t *) 0xa9d070b7
oldino = 0
oldgen = 0
__FUNCTION__ = "client_lookup_cbk"
#5 0xb74a79d2 in protocol_client_interpret (this=0x8058a68, trans=0x80941b8, hdr_p=0x8309188 "", hdrlen=504,
iobuf=0x0) at ../../../../../xlators/protocol/client/src/client-protocol.c:6507
ret = -1
frame = (call_frame_t *) 0xaa0d8287
hdr = (gf_hdr_common_t *) 0x8309188
callid = 236320
type = 4
op = 27
conn = (client_connection_t *) 0x80944c0
__FUNCTION__ = "protocol_client_interpret"
#6 0xb74a85ee in protocol_client_pollin (this=0x8058a68, trans=0x80941b8)
at ../../../../../xlators/protocol/client/src/client-protocol.c:6805
conf = (client_conf_t *) 0x8093ce0
ret = 0
iobuf = (struct iobuf *) 0x0
hdr = 0x8309188 ""
hdrlen = 504
#7 0xb74a8abd in notify (this=0x8058a68, event=2, data=0x80941b8)
at ../../../../../xlators/protocol/client/src/client-protocol.c:6924
i = 0
ret = -1
child_down = 1
was_not_down = 0
trans = (transport_t *) 0x80941b8
conn = (client_connection_t *) 0x0
conf = (client_conf_t *) 0x8093ce0
parent = (xlator_list_t *) 0x0
__FUNCTION__ = "notify"
#8 0xb7eb5720 in xlator_notify (xl=0x8058a68, event=2, data=0x80941b8) at ../../../libglusterfs/src/xlator.c:923
old_THIS = (xlator_t *) 0xb7ee7ba0
ret = 0
#9 0xb7ec2230 in transport_peerproc (trans_data=0x80941b8) at ../../../libglusterfs/src/transport.c:414
trans = (transport_t *) 0x80941b8
msg = (struct transport_msg *) 0x8174d70
#10 0xb7e8a383 in start_thread () from /lib/libpthread.so.0
No symbol table info available.
#11 0xb7e0f05e in clone () from /lib/libc.so.6
No symbol table info available.
(gdb) p *frame
$1 = {root = 0xab029000, parent = 0x66a58b7, next = 0x66a58aa, prev = 0xa6dac0aa, local = 0x0, this = 0x58a68b7,
ret = 0x44dd5408, ref_count = 183, lock = 256, cookie = 0x0, complete = _gf_false}
(gdb) p *frame->this
Cannot access memory at address 0x58a68b7
And here is the backtrace of crash in client_readdirp_cbk.
#0 0xb7553208 in client_readdirp_cbk (frame=0xa89072a7, hdr=0xa89c01e0, hdrlen=1316, iobuf=0x0)
at ../../../../../xlators/protocol/client/src/client-protocol.c:4461
fn = (ret_fn_t) 0x5093b008
_parent = (call_frame_t *) 0x994668a8
old_THIS = (xlator_t *) 0x0
rsp = (gf_fop_readdirp_rsp_t *) 0xa89c024c
op_ret = 11
op_errno = 0
buf_size = 1204
entries = {{list = {next = 0xa8702268, prev = 0xa8715bc0}, {next = 0xa8702268, prev = 0xa8715bc0}},
d_ino = 12773734187154396736, d_off = 7486, d_len = 2828271320, d_type = 3075854596, d_stat = {
st_dev = 12147639625727274734, __pad1 = 22120, __st_ino = 3086215132, st_mode = 2974114408,
st_nlink = 3075854656, st_uid = 1, st_gid = 2974116752, st_rdev = 13210695157743444568, __pad2 = 0,
st_size = -5673020022532510832, st_blksize = -1219209821, st_blocks = 134829584, st_atim = {tv_sec = 0,
tv_nsec = -1219209931}, st_mtim = {tv_sec = 7486, tv_nsec = 0}, st_ctim = {tv_sec = 0, tv_nsec = 4},
st_ino = 0}, d_name = 0xb1456248 "L\002\234ยจ\v"}
#1 0xb75589d2 in protocol_client_interpret (this=0x80580f8, trans=0x8095308, hdr_p=0xa89c01e0 "", hdrlen=1316,
iobuf=0x0) at ../../../../../xlators/protocol/client/src/client-protocol.c:6507
ret = -1
frame = (call_frame_t *) 0xa89072a7
hdr = (gf_hdr_common_t *) 0xa89c01e0
callid = 7486
type = 4
op = 44
conn = (client_connection_t *) 0x8095610
__FUNCTION__ = "protocol_client_interpret"
#2 0xb75595ee in protocol_client_pollin (this=0x80580f8, trans=0x8095308)
at ../../../../../xlators/protocol/client/src/client-protocol.c:6805
conf = (client_conf_t *) 0x8094e30
ret = 0
iobuf = (struct iobuf *) 0x0
hdr = 0xa89c01e0 ""
hdrlen = 1316
#3 0xb7559abd in notify (this=0x80580f8, event=2, data=0x8095308)
at ../../../../../xlators/protocol/client/src/client-protocol.c:6924
i = 0
ret = -1
child_down = 1
was_not_down = 0
trans = (transport_t *) 0x8095308
conn = (client_connection_t *) 0x0
conf = (client_conf_t *) 0x8094e30
parent = (xlator_list_t *) 0x0
__FUNCTION__ = "notify"
#4 0xb7f66720 in xlator_notify (xl=0x80580f8, event=2, data=0x8095308) at ../../../libglusterfs/src/xlator.c:923
old_THIS = (xlator_t *) 0xb7f98ba0
ret = 0
#5 0xb7f73230 in transport_peerproc (trans_data=0x8095308) at ../../../libglusterfs/src/transport.c:414
trans = (transport_t *) 0x8095308
msg = (struct transport_msg *) 0xa8989920
#6 0xb7f3b383 in start_thread () from /lib/libpthread.so.0
No symbol table info available.
#7 0xb7ec005e in clone () from /lib/libc.so.6
No symbol table info available.
(gdb) p *frame
$1 = {root = 0x99018000, parent = 0x994668a8, next = 0x994668a8, prev = 0x9901f4a8, local = 0xa8,
this = 0x580f800, ret = 0x5093b008, ref_count = 183, lock = 256, cookie = 0x0, complete = _gf_false}
(gdb) p *frame->this
Cannot access memory at address 0x580f800
I was running,
while true; do dbench -t 100 10; done
on the mount point.
This bug is no longer reproducible on latest git pull. Please reopen if the bug is found again. |
Created attachment 117 [details] sound module loaded at boot-time The logfile contains many runs and the crash is logged in the last but one run of glusterfs.