Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 315052 Details for
Bug 460190
new option to delay fence_tool join
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
fence_tool patch
fence_tool-m.patch (text/plain), 5.52 KB, created by
David Teigland
on 2008-08-26 21:05:20 UTC
(
hide
)
Description:
fence_tool patch
Filename:
MIME Type:
Creator:
David Teigland
Created:
2008-08-26 21:05:20 UTC
Size:
5.52 KB
patch
obsolete
>commit 3f0735e35f5297c7c85279694d311033710988ca >Author: David Teigland <teigland@redhat.com> >Date: Tue Aug 26 15:50:49 2008 -0500 > > fence_tool: new option to delay before join > > bz 460190 > > Certain network/switch settings cause nodes to form partitioned clusters > when they start up. We can add code to better cope with these network > problems, since they seem to be somewhat common. The network partitions > are a particular problem for two_node clusters where a node has quorum > when it starts up on its own. > > Add new fence_tool option -m, e.g. fence_tool join -m <seconds>. > This will cause fence_tool to wait for all nodes in cluster.conf > to be cluster members, or the timeout (in seconds), whichever comes > first, before joining the fence domain. > > The idea is that we'd use this option to allow openais on the nodes > to all see each other before starting the fence domain. So we join the > domain *after* the nodes merge into a single cluster. If we joined the > domain *before* the cluster partition merged, then nodes end up being > fenced unnecessarily. (This is a similar idea to post_join_delay; a delay > that gives us time to determine that a node in an unknown state is > actually ok and doesn't require fencing.) > > Signed-off-by: David Teigland <teigland@redhat.com> > >diff --git a/fence/fence_tool/fence_tool.c b/fence/fence_tool/fence_tool.c >index a6b002a..0b7ea62 100644 >--- a/fence/fence_tool/fence_tool.c >+++ b/fence/fence_tool/fence_tool.c >@@ -37,10 +37,12 @@ > #define FALSE 0 > #endif > >-#define OPTION_STRING ("Vht:wQ") >+#define OPTION_STRING ("Vht:m:wQ") > #define FENCED_SOCK_PATH "fenced_socket" > #define MAXLINE 256 > >+#define MAX_NODES 128 >+ > #define OP_JOIN 1 > #define OP_LEAVE 2 > #define OP_WAIT 3 >@@ -63,9 +65,15 @@ char *prog_name; > int operation; > int child_wait = FALSE; > int quorum_wait = TRUE; >+int member_wait = 0; > int fenced_start_timeout = 300; /* five minutes */ > int signalled = 0; > cman_handle_t ch; >+int all_nodeids[MAX_NODES]; >+int all_nodeids_count; >+cman_node_t cman_nodes[MAX_NODES]; >+int cman_nodes_count; >+ > > static int do_write(int fd, void *buf, size_t count) > { >@@ -245,6 +253,77 @@ static int do_wait(int joining) > return -1; > } > >+static int all_nodeids_are_members(void) >+{ >+ int i, j, rv, found; >+ >+ cman_nodes_count = 0; >+ memset(&cman_nodes, 0, sizeof(cman_nodes)); >+ >+ rv = cman_get_nodes(ch, MAX_NODES, &cman_nodes_count, cman_nodes); >+ if (rv < 0) { >+ printf("cman_get_nodes error %d %d\n", rv, errno); >+ return 0; >+ } >+ >+ for (i = 0; i < all_nodeids_count; i++) { >+ found = 0; >+ >+ for (j = 0; j < cman_nodes_count; j++) { >+ if (cman_nodes[j].cn_nodeid == all_nodeids[i] && >+ cman_nodes[j].cn_member) { >+ found = 1; >+ break; >+ } >+ } >+ >+ if (!found) >+ return 0; >+ } >+ return 1; >+} >+ >+static void wait_for_members(void) >+{ >+ char path[256]; >+ char *nodeid_str; >+ int i = 0, cd, error; >+ >+ while ((cd = ccs_connect()) < 0) { >+ sleep(1); >+ if (++i > 9 && !(i % 10)) >+ printf("connect to ccs error %d %d\n", cd, errno); >+ } >+ >+ memset(all_nodeids, 0, sizeof(all_nodeids)); >+ all_nodeids_count = 0; >+ >+ for (i = 1; ; i++) { >+ nodeid_str = NULL; >+ memset(path, 0, 256); >+ sprintf(path, "/cluster/clusternodes/clusternode[%d]/@nodeid", i); >+ >+ error = ccs_get(cd, path, &nodeid_str); >+ if (error || !nodeid_str) >+ break; >+ >+ all_nodeids[all_nodeids_count++] = atoi(nodeid_str); >+ free(nodeid_str); >+ } >+ >+ ccs_disconnect(cd); >+ >+ for (i = 0; i < member_wait; i++) { >+ if (all_nodeids_are_members()) >+ break; >+ if (i && !(i % 5)) >+ printf("Waiting for all %d nodes to be members\n", >+ all_nodeids_count); >+ sleep(1); >+ } >+ >+} >+ > static int do_join(int argc, char *argv[]) > { > int i, fd, rv; >@@ -264,6 +343,10 @@ static int do_join(int argc, char *argv[]) > cman_finish(ch); > return EXIT_FAILURE; > } >+ >+ if (member_wait) >+ wait_for_members(); >+ > cman_finish(ch); > > i = 0; >@@ -361,10 +444,12 @@ static void print_usage(void) > printf(" dump Dump debug buffer from fenced\n"); > printf("\n"); > printf("Options:\n"); >+ printf(" -m <n> Delay join up to n seconds for all nodes in cluster.conf\n"); >+ printf(" to be cluster members\n"); > printf(" -w Wait for join to complete\n"); > printf(" -V Print program version information, then exit\n"); > printf(" -h Print this help, then exit\n"); >- printf(" -t Maximum time in seconds to wait\n"); >+ printf(" -t <n> Maximum time in seconds to wait\n"); > printf(" -Q Fail if cluster is not quorate, don't wait\n"); > printf("\n"); > } >@@ -399,6 +484,10 @@ static void decode_arguments(int argc, char *argv[]) > child_wait = TRUE; > break; > >+ case 'm': >+ member_wait = atoi(optarg); >+ break; >+ > case ':': > case '?': > fprintf(stderr, "Please use '-h' for usage.\n"); >diff --git a/fence/man/fence_tool.8 b/fence/man/fence_tool.8 >index a7ad0c4..7477f6f 100644 >--- a/fence/man/fence_tool.8 >+++ b/fence/man/fence_tool.8 >@@ -27,6 +27,9 @@ it to stdout. > > .SH OPTIONS > .TP >+\fB-m\fP <n> >+Delay join up to n seconds for all nodes in cluster.conf to be cluster members. >+.TP > \fB-w\fP > Wait until the join or leave is completed. > .TP >@@ -36,8 +39,8 @@ Help. Print out the usage syntax. > \fB-V\fP > Print version information. > .TP >-\fB-t\fP >-Maximum time in seconds to wait (default: 300 seconds) >+\fB-t\fP <n> >+Maximum time in seconds to wait for quorum or -w (default: 300 seconds) > .TP > \fB-Q\fP > Fail command immediately if the cluster is not quorate, don't wait.
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 460190
: 315052 |
315053