Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 312662 Details for
Bug 435466
Rgmanager shows wrong service status in restricted failover domain
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
Pass 1 patch
rdomain-crash-rhel5.patch (text/plain), 10.16 KB, created by
Lon Hohberger
on 2008-07-25 17:40:12 UTC
(
hide
)
Description:
Pass 1 patch
Filename:
MIME Type:
Creator:
Lon Hohberger
Created:
2008-07-25 17:40:12 UTC
Size:
10.16 KB
patch
obsolete
>diff -ur rgmanager-current/include/members.h rgmanager/include/members.h >--- rgmanager-current/include/members.h 2008-07-24 16:24:24.000000000 -0400 >+++ rgmanager/include/members.h 2008-07-24 16:45:49.000000000 -0400 >@@ -17,6 +17,7 @@ > void member_set_state(int nodeid, int state); > int memb_count(cluster_member_list_t *ml); > int member_online(int nodeid); >+int member_online_set(int **nodes, int *nodecount); > int memb_online(cluster_member_list_t *ml, int nodeid); > int memb_online_name(cluster_member_list_t *ml, char *name); > int memb_name_to_id(cluster_member_list_t *ml, char *name); >diff -ur rgmanager-current/include/reslist.h rgmanager/include/reslist.h >--- rgmanager-current/include/reslist.h 2008-07-24 16:24:46.000000000 -0400 >+++ rgmanager/include/reslist.h 2008-07-24 17:23:34.000000000 -0400 >@@ -202,7 +202,7 @@ > void print_domains(fod_t **domains); > int node_should_start(int nodeid, cluster_member_list_t *membership, > char *rg_name, fod_t **domains); >-int node_domain_set(fod_t *domain, int **ret, int *retlen); >+int node_domain_set(fod_t **domains, char *name, int **ret, int *retlen, int *flags); > int node_domain_set_safe(char *domainname, int **ret, int *retlen, int *flags); > > >diff -ur rgmanager-current/src/clulib/members.c rgmanager/src/clulib/members.c >--- rgmanager-current/src/clulib/members.c 2008-07-24 16:24:46.000000000 -0400 >+++ rgmanager/src/clulib/members.c 2008-07-25 10:41:18.000000000 -0400 >@@ -213,6 +213,35 @@ > } > > >+int >+member_online_set(int **nodes, int *nodecount) >+{ >+ int ret = 1, i; >+ >+ pthread_rwlock_rdlock(&memblock); >+ if (!membership) >+ goto out_unlock; >+ >+ *nodes = malloc(sizeof(int) * membership->cml_count); >+ if (!*nodes) >+ goto out_unlock; >+ >+ *nodecount = 0; >+ for (i = 0; i < membership->cml_count; i++) { >+ if (membership->cml_members[i].cn_member && >+ membership->cml_members[i].cn_nodeid != 0) { >+ (*nodes)[*nodecount] = membership->cml_members[i].cn_nodeid; >+ ++(*nodecount); >+ } >+ } >+ >+ ret = 0; >+out_unlock: >+ pthread_rwlock_unlock(&memblock); >+ return ret; >+} >+ >+ > void > member_set_state(int nodeid, int state) > { >diff -ur rgmanager-current/src/daemons/fo_domain.c rgmanager/src/daemons/fo_domain.c >--- rgmanager-current/src/daemons/fo_domain.c 2008-07-24 16:24:46.000000000 -0400 >+++ rgmanager/src/daemons/fo_domain.c 2008-07-24 17:24:33.000000000 -0400 >@@ -349,13 +349,24 @@ > > > int >-node_domain_set(fod_t *domain, int **ret, int *retlen) >+node_domain_set(fod_t **domains, char *name, int **ret, int *retlen, int *flags) > { > int x, i, j; > int *tmpset; > int ts_count; >- > fod_node_t *fodn; >+ fod_t *domain; >+ int rv = -1, found = 0; >+ >+ list_for(domains, domain, x) { >+ if (!strcasecmp(domain->fd_name, name)) { >+ found = 1; >+ break; >+ } >+ } // while (!list_done(&_domains, fod)); >+ >+ if (!found) >+ return -1; > > /* Count domain length */ > list_for(&domain->fd_nodes, fodn, x) { } >@@ -368,6 +379,8 @@ > if (!(*tmpset)) > return -1; > >+ *flags = domain->fd_flags; >+ > if (domain->fd_flags & FOD_ORDERED) { > for (i = 1; i <= 100; i++) { > >diff -ur rgmanager-current/src/daemons/groups.c rgmanager/src/daemons/groups.c >--- rgmanager-current/src/daemons/groups.c 2008-07-24 16:24:46.000000000 -0400 >+++ rgmanager/src/daemons/groups.c 2008-07-25 10:34:43.000000000 -0400 >@@ -58,6 +58,7 @@ > > void res_build_name(char *, size_t, resource_t *); > int group_migratory(char *groupname, int lock); >+int _group_property(char *groupname, char *property, char *ret, size_t len); > > > struct status_arg { >@@ -88,23 +89,9 @@ > int > node_domain_set_safe(char *domainname, int **ret, int *retlen, int *flags) > { >- fod_t *fod; >- int rv = -1, found = 0, x = 0; >- >+ int rv = 0; > pthread_rwlock_rdlock(&resource_lock); >- >- list_for(&_domains, fod, x) { >- if (!strcasecmp(fod->fd_name, domainname)) { >- found = 1; >- break; >- } >- } // while (!list_done(&_domains, fod)); >- >- if (found) { >- rv = node_domain_set(fod, ret, retlen); >- *flags = fod->fd_flags; >- } >- >+ rv = node_domain_set(&_domains, domainname, ret, retlen, flags); > pthread_rwlock_unlock(&resource_lock); > > return rv; >@@ -438,6 +425,47 @@ > } > > >+int >+check_rdomain_crash(char *svcName) >+{ >+ int *nodes = NULL, nodecount; >+ int *fd_nodes = NULL, fd_nodecount, fl; >+ int *isect = NULL, icount; >+ char fd_name[256]; >+ >+ if (_group_property(svcName, "domain", fd_name, sizeof(fd_name)) != 0) >+ goto out_free; >+ >+ if (node_domain_set(_domains, fd_name, &fd_nodes, >+ &fd_nodecount, &fl) != 0) >+ goto out_free; >+ >+ if (!(fl & FOD_RESTRICTED)) >+ goto out_free; >+ >+ if (s_intersection(fd_nodes, fd_nodecount, nodes, nodecount, >+ &isect, &icount) < 0) >+ goto out_free; >+ >+ if (icount == 0) { >+ clulog(LOG_NOTICE, "Marking %s as stopped: " >+ "Restricted domain unavailable\n", svcName); >+ rt_enqueue_request(svcName, RG_STOP, NULL, 0, 0, >+ 0, 0); >+ } >+ >+out_free: >+ if (fd_nodes) >+ free(fd_nodes); >+ if (nodes) >+ free(nodes); >+ if (isect) >+ free(isect); >+ >+ return 0; >+} >+ >+ > /** > Start or failback a resource group: if it's not running, start it. > If it is running and we're a better member to run it, then ask for >@@ -451,6 +479,7 @@ > cman_node_t *mp; > int autostart, exclusive; > struct dlm_lksb lockp; >+ int fod_ret; > > mp = memb_id_to_p(membership, my_id()); > assert(mp); >@@ -542,10 +571,13 @@ > * Start any stopped services, or started services > * that are owned by a down node. > */ >- if (node_should_start(mp->cn_nodeid, membership, svcName, &_domains) == >- FOD_BEST) >+ fod_ret = node_should_start(mp->cn_nodeid, membership, >+ svcName, &_domains); >+ if (fod_ret == FOD_BEST) > rt_enqueue_request(svcName, RG_START, NULL, 0, mp->cn_nodeid, > 0, 0); >+ else if (fod_ret == FOD_ILLEGAL) >+ check_rdomain_crash(svcName); > } > > >@@ -1060,15 +1092,13 @@ > @return 0 on success, -1 on failure. > */ > int >-group_property(char *groupname, char *property, char *ret, size_t len) >+_group_property(char *groupname, char *property, char *ret, size_t len) > { > resource_t *res = NULL; > int x = 0; > >- pthread_rwlock_rdlock(&resource_lock); > res = find_root_by_ref(&_resources, groupname); > if (!res) { >- pthread_rwlock_unlock(&resource_lock); > return -1; > } > >@@ -1076,15 +1106,24 @@ > if (strcasecmp(res->r_attrs[x].ra_name, property)) > continue; > strncpy(ret, res->r_attrs[x].ra_value, len); >- pthread_rwlock_unlock(&resource_lock); > return 0; > } >- pthread_rwlock_unlock(&resource_lock); > > return -1; > } > > >+int >+group_property(char *groupname, char *property, char *ret_val, size_t len) >+{ >+ int ret = -1; >+ pthread_rwlock_rdlock(&resource_lock); >+ ret = _group_property(groupname, property, ret_val, len); >+ pthread_rwlock_unlock(&resource_lock); >+ return ret; >+} >+ >+ > /** > Send the state of a resource group to a given file descriptor. > >diff -ur rgmanager-current/src/daemons/rg_state.c rgmanager/src/daemons/rg_state.c >--- rgmanager-current/src/daemons/rg_state.c 2008-07-24 16:24:46.000000000 -0400 >+++ rgmanager/src/daemons/rg_state.c 2008-07-24 17:12:44.000000000 -0400 >@@ -470,6 +470,8 @@ > * 2 = DO NOT stop service, return 0 (success) > * 3 = DO NOT stop service, return RG_EFORWARD > * 4 = DO NOT stop service, return RG_EAGAIN >+ * 5 = DO NOT stop service, mark stopped and return >+ * RG_SUCCESS (0) > */ > int > svc_advise_stop(rg_state_t *svcStatus, char *svcName, int req) >@@ -529,9 +531,10 @@ > > /* > Service is marked as running but node is down. >- Doesn't make much sense to stop it. >+ Doesn't make much sense to stop it - but we need >+ to mark it stopped > */ >- ret = 2; >+ ret = 5; > break; > > case RG_STATE_ERROR: >@@ -1278,6 +1281,16 @@ > clulog(LOG_DEBUG, "Unable to stop RG %s in %s state\n", > svcName, rg_state_str(svcStatus.rs_state)); > return RG_EFAIL; >+ case 5: >+ /* Mark stopped, but do not do anything */ >+ svcStatus.rs_last_owner = svcStatus.rs_owner; >+ svcStatus.rs_owner = 0; >+ svcStatus.rs_state = RG_STATE_STOPPED; >+ if (set_rg_state(svcName, &svcStatus) != 0) { >+ rg_unlock(&lockp); >+ return RG_EFAIL; >+ } >+ /* FALLTHROUGH */ > case 2: > rg_unlock(&lockp); > return RG_ESUCCESS; >diff -ur rgmanager-current/src/daemons/service_op.c rgmanager/src/daemons/service_op.c >--- rgmanager-current/src/daemons/service_op.c 2008-07-24 16:24:46.000000000 -0400 >+++ rgmanager/src/daemons/service_op.c 2008-07-24 17:13:02.000000000 -0400 >@@ -150,8 +150,17 @@ > > if (get_service_state_internal(svcName, &svcStatus) < 0) > return RG_EFAIL; >- if (svcStatus.rs_owner > 0) >- msgtarget = svcStatus.rs_owner; >+ if (svcStatus.rs_owner > 0) { >+ if (member_online(svcStatus.rs_owner)) { >+ msgtarget = svcStatus.rs_owner; >+ } else { >+ /* If the owner is not online, >+ mark the service as 'stopped' but >+ otherwise, do nothing. >+ */ >+ return svc_stop(svcName, RG_STOP); >+ } >+ } > > if (msg_open(MSG_CLUSTER, msgtarget, RG_PORT, &ctx, 2)< 0) { > clulog(LOG_ERR, >diff -ur rgmanager-current/src/daemons/slang_event.c rgmanager/src/daemons/slang_event.c >--- rgmanager-current/src/daemons/slang_event.c 2008-07-24 16:24:46.000000000 -0400 >+++ rgmanager/src/daemons/slang_event.c 2008-07-24 16:45:14.000000000 -0400 >@@ -595,24 +595,12 @@ > void > sl_nodes_online(void) > { >- int i, *nodes, nodecount = 0; >+ int x, *nodes = NULL, nodecount = 0; > >- cluster_member_list_t *membership = member_list(); >- if (!membership) >- return; >- nodes = malloc(sizeof(int) * membership->cml_count); >- if (!nodes) >+ x = member_online_set(&nodes, &nodecount); >+ if (x < 0 || !nodes || !nodecount) > return; > >- nodecount = 0; >- for (i = 0; i < membership->cml_count; i++) { >- if (membership->cml_members[i].cn_member && >- membership->cml_members[i].cn_nodeid != 0) { >- nodes[nodecount] = membership->cml_members[i].cn_nodeid; >- ++nodecount; >- } >- } >- free_member_list(membership); > push_int_array(nodes, nodecount); > free(nodes); > } >diff -ur rgmanager-current/src/resources/default_event_script.sl rgmanager/src/resources/default_event_script.sl >--- rgmanager-current/src/resources/default_event_script.sl 2008-07-24 16:24:46.000000000 -0400 >+++ rgmanager/src/resources/default_event_script.sl 2008-07-25 10:36:42.000000000 -0400 >@@ -31,7 +31,8 @@ > > len = length(node_list); > if (len == 0) { >- debug(service, " is not runnable"); >+ notice(service, " is not runnable - restricted domain offline"); >+ ()=service_stop(service); > return ERR_DOMAIN; > } >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 435466
: 312662