Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 144741 Details for
Bug 201396
clusvcadm hangs if node processing request dies
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
Makes clusvcadm / rgmanager produce an error if the node dies while handling a req.
rgmanager-rhel4-nodedeath.patch (text/plain), 5.17 KB, created by
Lon Hohberger
on 2007-01-03 21:04:57 UTC
(
hide
)
Description:
Makes clusvcadm / rgmanager produce an error if the node dies while handling a req.
Filename:
MIME Type:
Creator:
Lon Hohberger
Created:
2007-01-03 21:04:57 UTC
Size:
5.17 KB
patch
obsolete
>### Eclipse Workspace Patch 1.0 >#P cluster-RHEL4 >Index: rgmanager/src/utils/clusvcadm.c >=================================================================== >RCS file: /cvs/cluster/cluster/rgmanager/src/utils/clusvcadm.c,v >retrieving revision 1.2.2.7 >diff -u -r1.2.2.7 clusvcadm.c >--- rgmanager/src/utils/clusvcadm.c 13 Dec 2006 18:19:56 -0000 1.2.2.7 >+++ rgmanager/src/utils/clusvcadm.c 3 Jan 2007 20:59:10 -0000 >@@ -147,6 +147,43 @@ > } > > >+int >+do_msg_receive(uint64_t msgtarget, int fd, void *buf, size_t len) >+{ >+ int ret; >+ cluster_member_list_t *m = NULL; >+ >+ if ((int64_t)msgtarget < (int64_t)0) >+ return msg_receive(fd, buf, len); >+ >+ /* Make sure a node hasn't died while processing our request. */ >+ do { >+ ret = msg_receive_timeout(fd, buf, len, 10); >+ if (ret < (int)len) { >+ if (ret < 0 && errno == ETIMEDOUT) { >+ m = clu_member_list(RG_SERVICE_GROUP); >+ if (!memb_online(m, msgtarget)) { >+ ret = RG_ENODEDEATH; >+ break; >+ } >+ cml_free(m); >+ m = NULL; >+ continue; >+ } >+ >+ /* Make sure we don't overwrite ENODEDEATH */ >+ if (ret < 0) >+ ret = -1; >+ } >+ break; >+ } while(1); >+ >+ if (m) >+ cml_free(m); >+ return ret; >+} >+ >+ > void > usage(char *name) > { >@@ -259,7 +296,6 @@ > usage(basename(argv[0])); > return 1; > } >- > > /* No login */ > fd = clu_connect(RG_SERVICE_GROUP, 0); >@@ -294,10 +330,15 @@ > fflush(stdout); > msgfd = msg_open(msgtarget, RG_PORT, 0, 5); > } else { >- printf("Trying to relocate %s to %s", svcname, nodename); >+ if (node_specified) >+ printf("Trying to relocate %s to %s", svcname, nodename); >+ else >+ printf("Trying to relocate %s", svcname); > printf("..."); > fflush(stdout); > msgfd = msg_open(me, RG_PORT, 0, 5); >+ /* just do a normal receive from the local node */ >+ msgtarget = (uint64_t)-1; > } > > if (msgfd < 0) { >@@ -312,10 +353,25 @@ > return 1; > } > >- if (msg_receive(msgfd, &msg, sizeof(msg)) != sizeof(msg)) { >- perror("msg_receive"); >- fprintf(stderr, "Error receiving reply!\n"); >- return 1; >+ /* reusing opt */ >+ opt = do_msg_receive(msgtarget, msgfd, &msg, >+ sizeof(msg)); >+ if (opt < (int)sizeof(msg)) { >+ if (opt != RG_ENODEDEATH) { >+ perror("msg_receive"); >+ fprintf(stderr, "Error receiving reply!\n"); >+ return 1; >+ } >+ >+ /* >+ * XXX hack to enable node death processing along side >+ * all the rest of the possible responses. If an end-node >+ * died while processing, this will have been set by the >+ * rgmanager and a response with RG_ENODEDEATH as the d_ret >+ * would have been received. >+ */ >+ msg.sm_data.d_ret = RG_ENODEDEATH; >+ swab_SmMessageSt(&msg); > } > > /* Decode */ >@@ -346,6 +402,10 @@ > case RG_EFAIL: > printf("failed\n"); > break; >+ case RG_ENODEDEATH: >+ printf("node processing request died\n"); >+ printf("(Status unknown)\n"); >+ break; > case RG_EABORT: > printf("cancelled by resource manager\n"); > break; >Index: rgmanager/include/resgroup.h >=================================================================== >RCS file: /cvs/cluster/cluster/rgmanager/include/resgroup.h,v >retrieving revision 1.3.2.8 >diff -u -r1.3.2.8 resgroup.h >--- rgmanager/include/resgroup.h 13 Dec 2006 18:19:57 -0000 1.3.2.8 >+++ rgmanager/include/resgroup.h 3 Jan 2007 20:59:10 -0000 >@@ -156,6 +156,7 @@ > cluster_member_list_t *member_list(void); > uint64_t my_id(void); > >+#define RG_ENODEDEATH -8 /* Processing node died */ > #define RG_ERUN -7 /* Service is running already */ > #define RG_EAGAIN -6 /* Try again */ > #define RG_EDEADLCK -5 /* Operation would cause deadlock */ >Index: rgmanager/src/daemons/rg_forward.c >=================================================================== >RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/rg_forward.c,v >retrieving revision 1.2.2.2 >diff -u -r1.2.2.2 rg_forward.c >--- rgmanager/src/daemons/rg_forward.c 13 Dec 2006 18:19:56 -0000 1.2.2.2 >+++ rgmanager/src/daemons/rg_forward.c 3 Jan 2007 20:59:10 -0000 >@@ -48,8 +48,9 @@ > rg_state_t rgs; > request_t *req = (request_t *)arg; > void *lockp; >- int fd; >+ int fd, ret; > SmMessageSt msg; >+ cluster_member_list_t *m = NULL; > > if (rg_lock(req->rr_group, &lockp) != 0) { > msg_close(req->rr_resp_fd); >@@ -88,19 +89,43 @@ > pthread_exit(NULL); > } > >- if (msg_receive(fd, &msg, sizeof(msg)) != sizeof(msg)) { >- msg_close(fd); >- msg_close(req->rr_resp_fd); >- rq_free(req); >- pthread_exit(NULL); >- } >+ /* >+ * Ok, we're forwarding a message to another node. Keep tabs on >+ * the node to make sure it doesn't die. Basically, wake up every >+ * now and again to make sure it's still online. If it isn't, send >+ * a response back to the caller. >+ */ >+ do { >+ ret = msg_receive_timeout(fd, &msg, sizeof(msg), 10); >+ if (ret < (int)sizeof(msg)) { >+ if (ret < 0 && errno == ETIMEDOUT) { >+ m = member_list(); >+ if (!memb_online(m, rgs.rs_owner)) { >+ msg.sm_data.d_ret = RG_ENODEDEATH; >+ /* we decode down below, >+ * so encode here */ >+ swab_SmMessageSt(&msg); >+ break; >+ } >+ cml_free(m); >+ m = NULL; >+ continue; >+ } >+ msg_close(fd); >+ msg_close(req->rr_resp_fd); >+ goto out; >+ } >+ break; >+ } while(1); >+ >+ if (m) >+ cml_free(m); > msg_close(fd); > > swab_SmMessageSt(&msg); > send_response(msg.sm_data.d_ret, req->rr_target, req); >- >+out: > rq_free(req); >- > pthread_exit(NULL); > } >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 201396
:
144739
| 144741