Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 155835 Details for
Bug 229650
Restart only the failed resource and its dependencies instead of the whole service.
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
Updated patch; fixes corner case
rgmanager-229650.patch (text/plain), 13.60 KB, created by
Lon Hohberger
on 2007-05-31 18:57:44 UTC
(
hide
)
Description:
Updated patch; fixes corner case
Filename:
MIME Type:
Creator:
Lon Hohberger
Created:
2007-05-31 18:57:44 UTC
Size:
13.60 KB
patch
obsolete
>Index: include/reslist.h >=================================================================== >RCS file: /cvs/cluster/cluster/rgmanager/include/reslist.h,v >retrieving revision 1.15.2.2 >diff -u -r1.15.2.2 reslist.h >--- include/reslist.h 23 Mar 2007 00:06:34 -0000 1.15.2.2 >+++ include/reslist.h 31 May 2007 18:56:30 -0000 >@@ -35,6 +35,8 @@ > #define RF_NEEDSTART (1<<2) /** Used when adding/changing resources */ > #define RF_NEEDSTOP (1<<3) /** Used when deleting/changing resources */ > #define RF_COMMON (1<<4) /** " */ >+#define RF_INDEPENDENT (1<<5) /** Define this for a resource if it is >+ otherwise an independent subtree */ > > #define RES_STOPPED (0) > #define RES_STARTED (1) >@@ -56,10 +58,10 @@ > > > typedef struct _resource_attribute { >- int ra_flags; >- /* XXX possible alignment problem on ia64 */ > char *ra_name; > char *ra_value; >+ int ra_flags; >+ int _pad_; > } resource_attr_t; > > >@@ -78,6 +80,7 @@ > time_t ra_last; > time_t ra_interval; > int ra_depth; >+ int _pad_; > } resource_act_t; > > >Index: src/daemons/groups.c >=================================================================== >RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/groups.c,v >retrieving revision 1.25.2.5 >diff -u -r1.25.2.5 groups.c >--- src/daemons/groups.c 10 May 2007 16:23:43 -0000 1.25.2.5 >+++ src/daemons/groups.c 31 May 2007 18:56:30 -0000 >@@ -813,6 +813,7 @@ > } > pthread_rwlock_unlock(&resource_lock); > >+#if 0 > /* > Do NOT return error codes if we failed to stop for one of these > reasons. It didn't start, either, so it's safe to assume that >@@ -830,6 +831,7 @@ > break; > } > } >+#endif > > return ret; > } >Index: src/daemons/resrules.c >=================================================================== >RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/resrules.c,v >retrieving revision 1.16.2.4 >diff -u -r1.16.2.4 resrules.c >--- src/daemons/resrules.c 31 May 2007 18:37:50 -0000 1.16.2.4 >+++ src/daemons/resrules.c 31 May 2007 18:56:31 -0000 >@@ -262,6 +262,7 @@ > acts[0].ra_depth = depth; > acts[0].ra_timeout = timeout; > acts[0].ra_interval = interval; >+ acts[0].ra_last = 0; > acts[1].ra_name = NULL; > > *actsp = acts; >@@ -271,7 +272,7 @@ > for (x = 0; acts[x].ra_name; x++) { > if (!strcmp(acts[x].ra_name, name) && > (depth == acts[x].ra_depth || depth == -1)) { >- printf("Replacing action '%s' depth %d: ", >+ fprintf(stderr, "Replacing action '%s' depth %d: ", > name, acts[x].ra_depth); > if (timeout >= 0) { > printf("timeout: %d->%d ", >@@ -306,6 +307,7 @@ > acts[x].ra_depth = depth; > acts[x].ra_timeout = timeout; > acts[x].ra_interval = interval; >+ acts[x].ra_last = 0; > > acts[x+1].ra_name = NULL; > >Index: src/daemons/restree.c >=================================================================== >RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/restree.c,v >retrieving revision 1.23.2.3 >diff -u -r1.23.2.3 restree.c >--- src/daemons/restree.c 3 May 2007 15:14:16 -0000 1.23.2.3 >+++ src/daemons/restree.c 31 May 2007 18:56:31 -0000 >@@ -39,6 +39,9 @@ > void malloc_zap_mutex(void); > #endif > >+#define FL_FAILURE 0x1 >+#define FL_RECOVERABLE 0x2 >+ > > /* XXX from resrules.c */ > int store_childtype(resource_child_t **childp, char *name, int start, >@@ -507,6 +510,19 @@ > node->rn_resource = curres; > node->rn_state = RES_STOPPED; > node->rn_actions = (resource_act_t *)act_dup(curres->r_actions); >+ >+ snprintf(tok, sizeof(tok), "%s/@__independent_subtree", base); >+#ifndef NO_CCS >+ if (ccs_get(ccsfd, tok, &ref) == 0) { >+#else >+ if (conf_get(tok, &ref) == 0) { >+#endif >+ if (atoi(ref) > 0 || strcasecmp(ref, "yes") == 0) >+ node->rn_flags |= RF_INDEPENDENT; >+ free(ref); >+ } >+ >+ > curres->r_refs++; > > *newnode = node; >@@ -718,7 +734,6 @@ > resource_rule_t **rulelist, > resource_t **reslist) > { >- resource_rule_t *curr; > resource_node_t *root = NULL; > char tok[512]; > >@@ -777,6 +792,8 @@ > printf("NEEDSTART "); > if (node->rn_flags & RF_COMMON) > printf("COMMON "); >+ if (node->rn_flags & RF_INDEPENDENT) >+ printf("INDEPENDENT "); > printf("]"); > } > printf(" {\n"); >@@ -838,10 +855,11 @@ > #endif > > /* Do op on all children at our level */ >- rv += _res_op(&node->rn_child, first, >+ rv |= _res_op(&node->rn_child, first, > rule->rr_childtypes[x].rc_name, > ret, op); >- if (rv != 0 && op != RS_STOP) >+ >+ if (rv & FL_FAILURE && op != RS_STOP) > return rv; > } > >@@ -853,46 +871,6 @@ > } > > >-#if 0 >-static inline int >-_do_child_default_level(resource_node_t **tree, resource_t *first, >- void *ret, int op) >-{ >- resource_node_t *node = *tree; >- resource_t *res = node->rn_resource; >- resource_rule_t *rule = res->r_rule; >- int x, rv = 0, lev; >- >- for (x = 0; rule->rr_childtypes && >- rule->rr_childtypes[x].rc_name; x++) { >- >- if(op == RS_STOP) >- lev = rule->rr_childtypes[x].rc_stoplevel; >- else >- lev = rule->rr_childtypes[x].rc_startlevel; >- >- if (lev) >- continue; >- >- /* >- printf("%s children of %s type %s (default level)\n", >- agent_op_str(op), >- node->rn_resource->r_rule->rr_type, >- rule->rr_childtypes[x].rc_name); >- */ >- >- rv = _res_op(&node->rn_child, first, >- rule->rr_childtypes[x].rc_name, >- ret, op); >- if (rv != 0) >- return rv; >- } >- >- return 0; >-} >-#endif >- >- > static inline int > _xx_child_internal(resource_node_t *node, resource_t *first, > resource_node_t *child, void *ret, int op) >@@ -926,13 +904,14 @@ > > if (op == RS_START || op == RS_STATUS) { > list_for(&node->rn_child, child, y) { >- rv = _xx_child_internal(node, first, child, ret, op); >- if (rv) >+ rv |= _xx_child_internal(node, first, child, ret, op); >+ >+ if (rv & FL_FAILURE) > return rv; > } > } else { > list_for_rev(&node->rn_child, child, y) { >- rv += _xx_child_internal(node, first, child, ret, op); >+ rv |= _xx_child_internal(node, first, child, ret, op); > } > } > >@@ -973,7 +952,7 @@ > > if (op == RS_START || op == RS_STATUS) { > rv = _do_child_levels(tree, first, ret, op); >- if (rv != 0) >+ if (rv & FL_FAILURE) > return rv; > > /* Start default level after specified ones */ >@@ -992,6 +971,22 @@ > } > > >+void >+mark_nodes(resource_node_t *node, int state, int flags) >+{ >+ int x; >+ resource_node_t *child; >+ >+ list_for(&node->rn_child, child, x) { >+ if (child->rn_child) >+ mark_nodes(child->rn_child, state, flags); >+ } >+ >+ node->rn_state = state; >+ node->rn_flags |= (RF_NEEDSTART | RF_NEEDSTOP); >+} >+ >+ > /** > Do a status on a resource node. This takes into account the last time the > status operation was run and selects the highest possible resource depth >@@ -1123,130 +1118,6 @@ > in the subtree). > @see _res_op_by_level res_exec > */ >-#if 0 >-int >-_res_op(resource_node_t **tree, resource_t *first, >- char *type, void * __attribute__((unused))ret, int realop) >-{ >- int rv, me; >- resource_node_t *node; >- int op; >- >- list_do(tree, node) { >- >- /* Restore default operation. */ >- op = realop; >- >- /* If we're starting by type, do that funky thing. */ >- if (type && strlen(type) && >- strcmp(node->rn_resource->r_rule->rr_type, type)) >- continue; >- >- /* If the resource is found, all nodes in the subtree must >- have the operation performed as well. */ >- me = !first || (node->rn_resource == first); >- >- /* >- printf("begin %s: %s %s [0x%x]\n", agent_op_str(op), >- node->rn_resource->r_rule->rr_type, >- primary_attr_value(node->rn_resource), >- node->rn_flags); >- */ >- >- if (me) { >- /* >- If we've been marked as a node which >- needs to be started or stopped, clear >- that flag and start/stop this resource >- and all resource babies. >- >- Otherwise, don't do anything; look for >- children with RF_NEEDSTART and >- RF_NEEDSTOP flags. >- >- CONDSTART and CONDSTOP are no-ops if >- the appropriate flag is not set. >- */ >- if ((op == RS_CONDSTART) && >- (node->rn_flags & RF_NEEDSTART)) { >- /* >- printf("Node %s:%s - CONDSTART\n", >- node->rn_resource->r_rule->rr_type, >- primary_attr_value(node->rn_resource)); >- */ >- op = RS_START; >- } >- >- if ((op == RS_CONDSTOP) && >- (node->rn_flags & RF_NEEDSTOP)) { >- /* >- printf("Node %s:%s - CONDSTOP\n", >- node->rn_resource->r_rule->rr_type, >- primary_attr_value(node->rn_resource)); >- */ >- op = RS_STOP; >- } >- } >- >- /* Start starts before children */ >- if (me && (op == RS_START)) { >- node->rn_flags &= ~RF_NEEDSTART; >- >- rv = res_exec(node, agent_op_str(op), NULL, 0); >- if (rv != 0) { >- node->rn_state = RES_FAILED; >- return rv; >- } >- >- set_time("start", 0, node); >- clear_checks(node); >- >- if (node->rn_state != RES_STARTED) { >- ++node->rn_resource->r_incarnations; >- node->rn_state = RES_STARTED; >- } >- } >- >- if (node->rn_child) { >- rv = _res_op_by_level(&node, me?NULL:first, ret, op); >- if (rv != 0) >- return rv; >- } >- >- /* Stop/status/etc stops after children have stopped */ >- if (me && (op == RS_STOP)) { >- node->rn_flags &= ~RF_NEEDSTOP; >- rv = res_exec(node, agent_op_str(op), NULL, 0); >- >- if (rv != 0) { >- node->rn_state = RES_FAILED; >- return rv; >- } >- >- if (node->rn_state != RES_STOPPED) { >- --node->rn_resource->r_incarnations; >- node->rn_state = RES_STOPPED; >- } >- >- } else if (me && (op == RS_STATUS)) { >- >- rv = do_status(node); >- if (rv != 0) >- return rv; >- } >- >- /* >- printf("end %s: %s %s\n", agent_op_str(op), >- node->rn_resource->r_rule->rr_type, >- primary_attr_value(node->rn_resource)); >- */ >- } while (!list_done(tree, node)); >- >- return 0; >-} >-#endif >- >- > static inline int > _res_op_internal(resource_node_t **tree, resource_t *first, > char *type, void *__attribute__((unused))ret, int realop, >@@ -1309,7 +1180,7 @@ > rv = res_exec(node, agent_op_str(op), NULL, 0); > if (rv != 0) { > node->rn_state = RES_FAILED; >- return rv; >+ return FL_FAILURE; > } > > set_time("start", 0, node); >@@ -1322,14 +1193,43 @@ > } else if (me && (op == RS_STATUS)) { > /* Check status before children*/ > rv = do_status(node); >- if (rv != 0) >- return rv; >+ if (rv != 0) { >+ /* >+ If this node's status has failed, all of its >+ dependent children are failed, whether or not this >+ node is independent or not. >+ */ >+ mark_nodes(node, RES_FAILED, >+ RF_NEEDSTART | RF_NEEDSTOP); >+ >+ /* If we're an independent subtree, return a flag >+ stating that this section is recoverable apart >+ from siblings in the resource tree. All child >+ resources of this node must be restarted, >+ but siblings of this node are not affected. */ >+ if (node->rn_flags & RF_INDEPENDENT) >+ return FL_RECOVERABLE; >+ >+ return FL_FAILURE; >+ } >+ > } > > if (node->rn_child) { > rv = _res_op_by_level(&node, me?NULL:first, ret, op); >- if (rv != 0) >- return rv; >+ if (rv != 0) { >+ mark_nodes(node, RES_FAILED, >+ RF_NEEDSTART | RF_NEEDSTOP); >+ >+ /* If this node is independent of its siblings, >+ that one of its dependent children failed >+ does not matter: its dependent children must >+ also be independent of this node's siblings. */ >+ if (node->rn_flags & RF_INDEPENDENT) >+ return FL_RECOVERABLE; >+ >+ return FL_FAILURE; >+ } > } > > /* Stop should occur after children have stopped */ >@@ -1339,7 +1239,7 @@ > > if (rv != 0) { > node->rn_state = RES_FAILED; >- return rv; >+ return FL_FAILURE; > } > > if (node->rn_state != RES_STOPPED) { >@@ -1378,24 +1278,31 @@ > char *type, void * __attribute__((unused))ret, int realop) > { > resource_node_t *node; >- int count = 0, rv; >+ int count = 0, rv = 0; > > if (realop == RS_STOP) { > list_for_rev(tree, node, count) { >- rv = _res_op_internal(tree, first, type, ret, realop, >- node); >- if (rv != 0) >- return rv; >+ rv |= _res_op_internal(tree, first, type, ret, realop, >+ node); > } > } else { > list_for(tree, node, count) { >- rv = _res_op_internal(tree, first, type, ret, realop, >- node); >- if (rv != 0) >+ rv |= _res_op_internal(tree, first, type, ret, realop, >+ node); >+ >+ /* If we hit a problem during a 'status' op in an >+ independent subtree, rv will have the >+ FL_RECOVERABLE bit set, but not FL_FAILURE. >+ If we ever hit FL_FAILURE during a status >+ operation, we're *DONE* - even if the subtree >+ is flagged w/ indy-subtree */ >+ >+ if (rv & FL_FAILURE) > return rv; > } > } >- return 0; >+ >+ return rv; > } > > /** >@@ -1464,7 +1371,30 @@ > int > res_status(resource_node_t **tree, resource_t *res, void *ret) > { >- return _res_op(tree, res, NULL, ret, RS_STATUS); >+ int rv; >+ rv = _res_op(tree, res, NULL, ret, RS_STATUS); >+ >+ if (rv & FL_FAILURE) >+ return rv; >+ >+ clulog(LOG_WARNING, "Some independent resources in %s:%s failed; " >+ "Attempting inline recovery\n", >+ res->r_rule->rr_type, res->r_attrs->ra_value); >+ >+ rv = res_condstop(tree, res, ret); >+ if (rv & FL_FAILURE) >+ goto out_fail; >+ rv = res_condstart(tree, res, ret); >+ if (rv & FL_FAILURE) >+ goto out_fail; >+ >+ clulog(LOG_NOTICE, "Inline recovery of %s:%s successful\n", >+ res->r_rule->rr_type, res->r_attrs->ra_value); >+ return 0; >+out_fail: >+ clulog(LOG_WARNING, "Inline recovery of %s:%s failed\n", >+ res->r_rule->rr_type, res->r_attrs->ra_value); >+ return 1; > } > > >Index: src/resources/script.sh >=================================================================== >RCS file: /cvs/cluster/cluster/rgmanager/src/resources/script.sh,v >retrieving revision 1.8 >diff -u -r1.8 script.sh >--- src/resources/script.sh 18 Aug 2006 15:26:23 -0000 1.8 >+++ src/resources/script.sh 31 May 2007 18:56:31 -0000 >@@ -115,5 +115,5 @@ > declare -i rv=$? > if [ $rv -ne 0 ]; then > ocf_log err "script:$OCF_RESKEY_name: $1 of $OCF_RESKEY_file failed (returned $rv)" >- return $OCF_ERR_GENERIC >+ exit $OCF_ERR_GENERIC > fi
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 229650
:
148657
|
155385
|
155386
|
155387
|
155388
|
155389
| 155835 |
156923
|
181341
|
205561
|
205861