Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 892304 Details for
Bug 1001210
If rdma transport is used in cluster.conf, cman is unable to start after shutdown.
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
Add multicast recovery to IB
07totemiba_mcast_error.patch (text/plain), 3.45 KB, created by
Yevheniy Demchenko
on 2014-05-04 18:12:16 UTC
(
hide
)
Description:
Add multicast recovery to IB
Filename:
MIME Type:
Creator:
Yevheniy Demchenko
Created:
2014-05-04 18:12:16 UTC
Size:
3.45 KB
patch
obsolete
>--- corosync-1.4.1/exec/totemiba.c.totemiba_mcast_error 2014-05-04 19:30:03.284706023 +0200 >+++ corosync-1.4.1/exec/totemiba.c 2014-05-04 19:33:02.689705831 +0200 >@@ -206,6 +206,10 @@ struct totemiba_instance { > struct list_head token_send_buf_head; > > struct list_head recv_token_recv_buf_head; >+ >+ int mcast_seen_joined; >+ >+ poll_timer_handle mcast_rejoin; > }; > union u { > uint64_t wr_id; >@@ -517,6 +521,26 @@ static int mcast_cq_recv_event_fn (hdb_h > return (0); > } > >+static void mcast_rejoin (struct totemiba_instance *instance) >+{ >+ int res; >+ res = rdma_leave_multicast (instance->mcast_cma_id, &instance->mcast_addr); >+ if (instance->mcast_ah) { >+ ibv_destroy_ah (instance->mcast_ah); >+ instance->mcast_ah = 0; >+ } >+ >+ res = rdma_join_multicast (instance->mcast_cma_id, &instance->mcast_addr, instance); >+ if (res != 0) { >+ log_printf (LOGSYS_LEVEL_DEBUG, "rdma_join_multicast failed, errno=%d, rejoining in 100ms\n",errno); >+ poll_timer_add (instance->totemiba_poll_handle, >+ 100, >+ (void *)instance, >+ mcast_rejoin, >+ &instance->mcast_rejoin); >+ } >+} >+ > static int mcast_rdma_event_fn (hdb_handle_t poll_handle, int fd, int events, void *context) > { > struct totemiba_instance *instance = (struct totemiba_instance *)context; >@@ -534,8 +558,16 @@ static int mcast_rdma_event_fn (hdb_hand > * occurs when we resolve the multicast address > */ > case RDMA_CM_EVENT_ADDR_RESOLVED: >- rdma_join_multicast (instance->mcast_cma_id, &instance->mcast_addr, instance); >+ res = rdma_join_multicast (instance->mcast_cma_id, &instance->mcast_addr, instance); > usleep(1000); >+ if (res == 0) break; >+ case RDMA_CM_EVENT_MULTICAST_ERROR: >+ log_printf (LOGSYS_LEVEL_ERROR, "multicast error, trying to rejoin in 100ms\n"); >+ poll_timer_add (instance->totemiba_poll_handle, >+ 100, >+ (void *)instance, >+ mcast_rejoin, >+ &instance->mcast_rejoin); > break; > /* > * occurs when the CM joins the multicast group >@@ -544,14 +576,15 @@ static int mcast_rdma_event_fn (hdb_hand > instance->mcast_qpn = event->param.ud.qp_num; > instance->mcast_qkey = event->param.ud.qkey; > instance->mcast_ah = ibv_create_ah (instance->mcast_pd, &event->param.ud.ah_attr); >- >- instance->totemiba_iface_change_fn (instance->rrp_context, &instance->my_id); >+ if (instance->mcast_seen_joined == 0) { >+ log_printf (LOGSYS_LEVEL_DEBUG, "joining mcast 1st time, running callbacks\n"); >+ instance->totemiba_iface_change_fn (instance->rrp_context, &instance->my_id); >+ instance->mcast_seen_joined=1; >+ } >+ log_printf (LOGSYS_LEVEL_DEBUG, "Joined multicast!\n"); > break; > case RDMA_CM_EVENT_ADDR_ERROR: > case RDMA_CM_EVENT_ROUTE_ERROR: >- case RDMA_CM_EVENT_MULTICAST_ERROR: >- log_printf (LOGSYS_LEVEL_ERROR, "multicast error\n"); >- break; > case RDMA_CM_EVENT_DEVICE_REMOVAL: > break; > default: >@@ -1438,8 +1471,9 @@ int totemiba_mcast_flush_send ( > sge.length = msg_len; > sge.lkey = send_buf->mr->lkey; > sge.addr = (uintptr_t)msg; >- >- res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr); >+ >+ if ( instance->mcast_ah != 0 ) >+ res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr); > return (res); > } > >@@ -1476,8 +1510,9 @@ int totemiba_mcast_noflush_send ( > sge.length = msg_len; > sge.lkey = send_buf->mr->lkey; > sge.addr = (uintptr_t)msg; >- >- res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr); >+ >+ if ( instance->mcast_ah != 0 ) >+ res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr); > return (res); > } >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 1001210
:
790643
|
790644
|
790645
|
790646
|
790647
|
793906
|
797602
|
797603
|
797604
|
797605
|
797606
|
797607
|
797619
|
797620
|
800375
|
800376
|
800377
|
892304
|
895478
|
895956
|
899961