Bug 1201613
| Summary: | RDMA: Crash seen during smallfile read test. | |||
|---|---|---|---|---|
| Product: | [Red Hat Storage] Red Hat Gluster Storage | Reporter: | Ben Turner <bturner> | |
| Component: | rdma | Assignee: | Mohammed Rafi KC <rkavunga> | |
| Status: | CLOSED ERRATA | QA Contact: | Ben Turner <bturner> | |
| Severity: | urgent | Docs Contact: | ||
| Priority: | high | |||
| Version: | rhgs-3.0 | CC: | aavati, asrivast, nlevinki, rkavunga, rtalur, rwheeler, vagarwal | |
| Target Milestone: | --- | Keywords: | ZStream | |
| Target Release: | RHGS 3.0.4 | |||
| Hardware: | x86_64 | |||
| OS: | Linux | |||
| Whiteboard: | ||||
| Fixed In Version: | glusterfs-3.6.0.52-1 | Doc Type: | Bug Fix | |
| Doc Text: | Story Points: | --- | ||
| Clone Of: | ||||
| : | 1201621 (view as bug list) | Environment: | ||
| Last Closed: | 2015-03-26 06:37:01 UTC | Type: | Bug | |
| Regression: | --- | Mount Type: | --- | |
| Documentation: | --- | CRM: | ||
| Verified Versions: | Category: | --- | ||
| oVirt Team: | --- | RHEL 7.3 requirements from Atomic Host: | ||
| Cloudforms Team: | --- | Target Upstream Version: | ||
| Embargoed: | ||||
| Bug Depends On: | ||||
| Bug Blocks: | 1182947, 1201621 | |||
(gdb) bt full
#0 0x00007f04626fd6b0 in __gf_rdma_deregister_mr (context=0x7f0450009798) at rdma.c:1667
tmp = 0xbabebabe
i = <value optimized out>
found = 0
#1 __gf_rdma_request_context_destroy (context=0x7f0450009798) at rdma.c:1733
peer = 0x7f045c05fce0
priv = <value optimized out>
device = 0x7f0450000b70
ret = 0
__FUNCTION__ = "__gf_rdma_request_context_destroy"
#2 0x00007f04626fe188 in gf_rdma_pollin_notify (peer=0x7f045c05fce0, post=<value optimized out>) at rdma.c:3712
ret = <value optimized out>
msg_type = <value optimized out>
rpc_req = <value optimized out>
request_context = 0x7f0450009798
request_info = {xid = 34619545, prognum = 1298437, progver = 330, procnum = 27, rpc_req = 0x7f0460e935a4, rsp = {rsphdr = 0x0,
rsphdr_count = 0, rsp_payload = 0x0, rsp_payload_count = 0, rsp_iobref = 0x0}}
priv = 0x7f045c05fcd0
ptr = <value optimized out>
pollin = 0x7f0430741e50
__FUNCTION__ = "gf_rdma_pollin_notify"
#3 0x00007f04626fe431 in gf_rdma_recv_reply (peer=0x7f045c05fce0, post=0x7f0450f956e0) at rdma.c:3813
ret = 0
header = <value optimized out>
reply_info = 0x7f045001247c
wc_array = <value optimized out>
i = <value optimized out>
ptr = <value optimized out>
ctx = <value optimized out>
request_info = {xid = 34619545, prognum = 1298437, progver = 330, procnum = 27, rpc_req = 0x7f0460e935a4, rsp = {rsphdr = 0x0,
rsphdr_count = 0, rsp_payload = 0x0, rsp_payload_count = 0, rsp_iobref = 0x0}}
rpc_req = <value optimized out>
__FUNCTION__ = "gf_rdma_recv_reply"
#4 0x00007f04626fe83b in gf_rdma_process_recv (peer=0x7f045c05fce0, wc=<value optimized out>) at rdma.c:3946
post = 0x7f0450f956e0
readch = 0x0
ret = <value optimized out>
ptr = <value optimized out>
msg_type = <value optimized out>
header = 0x7f0452cea000
priv = 0x7f045c05fcd0
__FUNCTION__ = "gf_rdma_process_recv"
#5 0x00007f04626fea25 in gf_rdma_recv_completion_proc (data=0x7f0450018cb0) at rdma.c:4083
chan = <value optimized out>
device = 0x7f0450000b70
post = 0x7f0450f956e0
peer = <value optimized out>
event_cq = 0x7f0450018cd0
wc = {{wr_id = 139656515114720, status = IBV_WC_SUCCESS, opcode = IBV_WC_RECV, vendor_err = 0, byte_len = 52, imm_data = 0,
qp_num = 132, src_qp = 1, wc_flags = 0, pkey_index = 0, slid = 5, sl = 0 '\000', dlid_path_bits = 0 '\000'}, {
wr_id = 139656515115440, status = IBV_WC_SUCCESS, opcode = IBV_WC_RECV, vendor_err = 0, byte_len = 64, imm_data = 0,
qp_num = 132, src_qp = 1, wc_flags = 0, pkey_index = 0, slid = 5, sl = 0 '\000', dlid_path_bits = 0 '\000'}, {
wr_id = 139656515162224, status = IBV_WC_SUCCESS, opcode = IBV_WC_RECV, vendor_err = 0, byte_len = 176, imm_data = 0,
qp_num = 132, src_qp = 1, wc_flags = 0, pkey_index = 0, slid = 5, sl = 0 '\000', dlid_path_bits = 0 '\000'}, {
wr_id = 139656515180768, status = IBV_WC_SUCCESS, opcode = IBV_WC_RECV, vendor_err = 0, byte_len = 192, imm_data = 0,
qp_num = 132, src_qp = 1, wc_flags = 0, pkey_index = 0, slid = 5, sl = 0 '\000', dlid_path_bits = 0 '\000'}, {
wr_id = 139656515213152, status = IBV_WC_SUCCESS, opcode = IBV_WC_RECV, vendor_err = 0, byte_len = 72, imm_data = 0,
qp_num = 130, src_qp = 1, wc_flags = 0, pkey_index = 0, slid = 4, sl = 0 '\000', dlid_path_bits = 0 '\000'}, {
wr_id = 139656515542080, status = IBV_WC_SUCCESS, opcode = IBV_WC_RECV, vendor_err = 0, byte_len = 72, imm_data = 0,
qp_num = 130, src_qp = 1, wc_flags = 0, pkey_index = 0, slid = 4, sl = 0 '\000', dlid_path_bits = 0 '\000'}, {
wr_id = 139656517359392, status = IBV_WC_SUCCESS, opcode = IBV_WC_RECV, vendor_err = 0, byte_len = 192, imm_data = 0,
qp_num = 132, src_qp = 1, wc_flags = 0, pkey_index = 0, slid = 5, sl = 0 '\000', dlid_path_bits = 0 '\000'}, {
wr_id = 139656517358736, status = IBV_WC_SUCCESS, opcode = IBV_WC_RECV, vendor_err = 0, byte_len = 176, imm_data = 0,
qp_num = 132, src_qp = 1, wc_flags = 0, pkey_index = 0, slid = 5, sl = 0 '\000', dlid_path_bits = 0 '\000'}, {
wr_id = 139656517358208, status = IBV_WC_SUCCESS, opcode = IBV_WC_RECV, vendor_err = 0, byte_len = 176, imm_data = 0,
qp_num = 132, src_qp = 1, wc_flags = 0, pkey_index = 0, slid = 5, sl = 0 '\000', dlid_path_bits = 0 '\000'}, {
wr_id = 139656517357680, status = IBV_WC_SUCCESS, opcode = IBV_WC_RECV, vendor_err = 0, byte_len = 176, imm_data = 0,
qp_num = 130, src_qp = 1, wc_flags = 0, pkey_index = 0, slid = 4, sl = 0 '\000', dlid_path_bits = 0 '\000'}}
event_ctx = 0x7f0450000b70
ret = <value optimized out>
num_wr = 1
index = <value optimized out>
---Type <return> to continue, or q <return> to quit---
failed = 0 '\000'
__FUNCTION__ = "gf_rdma_recv_completion_proc"
#6 0x0000003acfc079d1 in start_thread (arg=0x7f0455fff700) at pthread_create.c:301
__res = <value optimized out>
pd = 0x7f0455fff700
now = <value optimized out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {139656599435008, 3437653819718065564, 252595782496, 139656599435712, 0, 3,
-3333697194911900260, 3441137622062161308}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0,
cleanup = 0x0, canceltype = 0}}}
not_first_call = <value optimized out>
pagesize_m1 = <value optimized out>
sp = <value optimized out>
freesize = <value optimized out>
#7 0x0000003acf8e8b6d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:115
upstream patch http://review.gluster.org/#/c/9872/ Verified on 3.6.0.52. Since the problem described in this bug report should be resolved in a recent advisory, it has been closed with a resolution of ERRATA. For information on the advisory, and where to find the updated files, follow the link below. If the solution does not work for you, open a new bug report. https://rhn.redhat.com/errata/RHBA-2015-0682.html |
Description of problem: During smallfile read test I saw a crash in: Program terminated with signal 11, Segmentation fault. #0 0x00007f04626fd6b0 in __gf_rdma_deregister_mr (context=0x7f0450009798) at rdma.c:1667 1667 if (tmp->mr == mr[i]) { Version-Release number of selected component (if applicable): 3.6.0.51 How reproducible: 1 in 25 runs so far. Steps to Reproduce: 1. Run smallfile reads over RDMA mount 2. 3. Actual results: Crash Expected results: No crash Additional info: