Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 145393 Details for
Bug 211238
Cluster node reboot debug option for clumanager
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
Newer patch
clumanager-1.2.34-rebootdebug2.patch (text/plain), 8.89 KB, created by
Lon Hohberger
on 2007-01-11 21:13:46 UTC
(
hide
)
Description:
Newer patch
Filename:
MIME Type:
Creator:
Lon Hohberger
Created:
2007-01-11 21:13:46 UTC
Size:
8.89 KB
patch
obsolete
>Index: doc/man/cludb.8 >=================================================================== >RCS file: /cvs/devel/clumanager/doc/man/cludb.8,v >retrieving revision 1.12 >retrieving revision 1.13 >diff -u -r1.12 -r1.13 >--- doc/man/cludb.8 4 May 2006 20:56:25 -0000 1.12 >+++ doc/man/cludb.8 11 Jan 2007 18:57:30 -0000 1.13 >@@ -85,6 +85,18 @@ > .IP "cluster%logfacility" > This changes the logging facility the cluster uses. See > /usr/include/sys/syslog.h for a list of valid facilities. >+.IP "cluster%reboot_debug" >+In some cases, clumanager will reboot a node. Setting this flag to "yes" >+will cause clumanager to write out /tmp/clumanager-reboot-debug prior >+to rebooting the machine, which contains the source code file, line, and >+function in the clumanager source code where the reboot was requested. >+(yes/no; default=no). If you think clumanager is rebooting your machine >+without a reason, set this to "yes" and check for it on the next reboot. >+You can then either browse the source file, contact Red Hat Support, or >+post the relevant information to linux-cluster@redhat.com if you require >+more information. Note that this does NOT trap any reboot call initiated >+from outside of clumanager, including but not limited to watchdog-initiated >+reboots. > .IP "cluster%disk_error_action" > Action to take if access to the shared state information (raw devices) > returns errors (ignore/stop/halt/reboot; default=reboot). Ignore >Index: include/sharedstate.h >=================================================================== >RCS file: /cvs/devel/clumanager/include/sharedstate.h,v >retrieving revision 1.12 >retrieving revision 1.13 >diff -u -r1.12 -r1.13 >--- include/sharedstate.h 17 Dec 2003 16:43:56 -0000 1.12 >+++ include/sharedstate.h 11 Jan 2007 18:43:29 -0000 1.13 >@@ -202,4 +202,8 @@ > int shared_storage_deinit(void); > > >+void do_reboot(int how, const char *func, const char *file, int line); >+#define REBOOT(arg) do_reboot(arg, __FUNCTION__, __FILE__, __LINE__) >+ >+ > #endif /* sharedstate.h */ >Index: src/clulib/shared_services.c >=================================================================== >RCS file: /cvs/devel/clumanager/src/clulib/shared_services.c,v >retrieving revision 1.9 >retrieving revision 1.11 >diff -u -r1.9 -r1.11 >--- src/clulib/shared_services.c 16 Dec 2004 19:41:21 -0000 1.9 >+++ src/clulib/shared_services.c 11 Jan 2007 21:04:34 -0000 1.11 >@@ -21,7 +21,7 @@ > * Shared Service Block read/write functions. > */ > /* >- * $Id: shared_services.c,v 1.9 2004/12/16 19:41:21 lhh Exp $ >+ * $Id: shared_services.c,v 1.11 2007/01/11 21:04:34 lhh Exp $ > * > * author: Tim Burke <tburke at redhat.com> > * description: Interface to Service descriptions. >@@ -42,6 +42,7 @@ > #include <sys/wait.h> > #include <sys/time.h> > #include <sys/reboot.h> >+#include <sched.h> > #include <signal.h> > #include <errno.h> > #include <clu_lock.h> >@@ -73,7 +74,7 @@ > static int writeServiceBlock(SharedServiceBlock *svcblk); > void printServiceBlock(SharedServiceBlock *svcblk); > >-static const char *version __attribute__ ((unused)) = "$Id: shared_services.c,v 1.9 2004/12/16 19:41:21 lhh Exp $"; >+static const char *version __attribute__ ((unused)) = "$Id: shared_services.c,v 1.11 2007/01/11 21:04:34 lhh Exp $"; > /* > * .............Configurable Parameters................... > * >@@ -104,6 +105,51 @@ > } > > >+void >+do_reboot(int how, const char *func, const char *file, int line) >+{ >+ char *val = NULL; >+ int fd; >+ char buf[128]; >+ time_t now = time(NULL); >+ struct sched_param s; >+ >+ CFG_Get("cluster%reboot_debug", NULL, &val); >+ >+ if (!val || !strlen(val) || val[0]=='n' || >+ ((val[0] != 'y' && atoi(val)==0))) { >+ reboot(how); >+ return; >+ } >+ >+ /* Pre-empt EVERYTHING */ >+ memset(&s,0, sizeof(s)); >+ s.sched_priority = 100; >+ sched_setscheduler(0, SCHED_RR, &s); >+ >+ fd = open("/tmp/clumanager-reboot-debug", >+ O_RDWR|O_SYNC|O_TRUNC|O_CREAT); >+ if (fd < 0) { >+ printf("Failed to create file\n"); >+ } else { >+ snprintf(buf, sizeof(buf), >+ "%s-> reboot(%d) @ %s:%d in function %s()\n", >+ ctime(&now), how, file, line, func); >+ >+ write(fd, buf, strlen(buf)); >+ fdatasync(fd); >+ fsync(fd); >+ close(fd); >+ sync(); sync(); sync(); >+ } >+ >+ reboot(how); >+ /* notreached */ >+ s.sched_priority = 0; >+ sched_setscheduler(0, SCHED_OTHER, &s); >+} >+ >+ > /** > * Take action on read/write error from shared state. > */ >@@ -124,7 +170,7 @@ > > if (!strcasecmp(val, "halt")) { > clulog(LOG_ALERT, "Shared State Error: HALTING"); >- reboot(RB_HALT_SYSTEM); >+ REBOOT(RB_HALT_SYSTEM); > return; > } > >@@ -137,7 +183,7 @@ > > reboot: > clulog(LOG_ALERT, "Shared State Error: REBOOTING"); >- reboot(RB_AUTOBOOT); >+ REBOOT(RB_AUTOBOOT); > } > > >Index: src/clulib/stonith_fence.c >=================================================================== >RCS file: /cvs/devel/clumanager/src/clulib/stonith_fence.c,v >retrieving revision 1.19 >retrieving revision 1.20 >diff -u -r1.19 -r1.20 >--- src/clulib/stonith_fence.c 11 Feb 2005 22:22:56 -0000 1.19 >+++ src/clulib/stonith_fence.c 11 Jan 2007 18:43:29 -0000 1.20 >@@ -343,8 +343,7 @@ > > if (node == clu_node_id) { > clulog(LOG_ALERT, "STONITH: Shooting self!\n"); >- sync(); >- reboot(RB_AUTOBOOT); >+ REBOOT(RB_AUTOBOOT); > } > > /* >Index: src/daemons/clumembd.c >=================================================================== >RCS file: /cvs/devel/clumanager/src/daemons/clumembd.c,v >retrieving revision 1.56 >retrieving revision 1.57 >diff -u -r1.56 -r1.57 >--- src/daemons/clumembd.c 22 Jun 2006 15:03:00 -0000 1.56 >+++ src/daemons/clumembd.c 11 Jan 2007 18:43:29 -0000 1.57 >@@ -20,7 +20,7 @@ > * Consensus/VF-based membership daemon for Red Hat Cluster Manager. > */ > /* >- * $Id: clumembd.c,v 1.56 2006/06/22 15:03:00 lhh Exp $ >+ * $Id: clumembd.c,v 1.57 2007/01/11 18:43:29 lhh Exp $ > * > * Author: Lon Hohberger <lhh at redhat.com> > * Brian Stevens <bstevens at redhat.com> >@@ -50,6 +50,7 @@ > #include <termios.h> > #include <sys/syslog.h> > #include <membership.h> >+#include <sharedstate.h> > #include <sched.h> > #include <pthread.h> > >@@ -1736,8 +1737,7 @@ > if (!debug && __cmp_tv(&maxtime, &diff) == 1) { > clulog(LOG_EMERG, "Failed to send a heartbeat within " > "failover time - REBOOTING\n"); >- sync(); >- reboot(RB_AUTOBOOT); >+ REBOOT(RB_AUTOBOOT); > } > > /* >@@ -1993,7 +1993,7 @@ > "a heartbeat within failover " > "time - REBOOTING\n"); > sync(); >- reboot(RB_AUTOBOOT); /* :( */ >+ REBOOT(RB_AUTOBOOT); /* :( */ > } > } else { > failed_sends = 0; >Index: src/daemons/cluquorumd.c >=================================================================== >RCS file: /cvs/devel/clumanager/src/daemons/cluquorumd.c,v >retrieving revision 1.57 >retrieving revision 1.58 >diff -u -r1.57 -r1.58 >--- src/daemons/cluquorumd.c 4 May 2006 20:56:25 -0000 1.57 >+++ src/daemons/cluquorumd.c 11 Jan 2007 18:43:29 -0000 1.58 >@@ -621,7 +621,7 @@ > if (!clean_cluster_transition) { > clulog(LOG_EMERG, "Quorum lost! " > "No STONITH devices - REBOOTING NOW"); >- reboot(RB_AUTOBOOT); >+ REBOOT(RB_AUTOBOOT); > exit(1); > } > >@@ -1321,7 +1321,7 @@ > clulog(LOG_EMERG, "Lost connection to membership: " > "REBOOTING NOW\n"); > sync(); sync(); sync(); >- reboot(RB_AUTOBOOT); >+ REBOOT(RB_AUTOBOOT); > #endif > } > clist_delete(fd); >Index: src/daemons/cluquorumd_disk.c >=================================================================== >RCS file: /cvs/devel/clumanager/src/daemons/cluquorumd_disk.c,v >retrieving revision 1.16 >retrieving revision 1.17 >diff -u -r1.16 -r1.17 >--- src/daemons/cluquorumd_disk.c 21 Sep 2004 18:50:39 -0000 1.16 >+++ src/daemons/cluquorumd_disk.c 11 Jan 2007 18:43:29 -0000 1.17 >@@ -27,6 +27,7 @@ > #include <xmlwrap.h> > #include <clulog.h> > #include <sys/reboot.h> >+#include <sharedstate.h> > #include <stonith.h> > #include <stonithapis.h> > #include <disk_tie.h> >@@ -396,7 +397,7 @@ > (status_block.ps_state == NODE_DOWN)) { > #ifdef OLD_BEHAVIOR > clulog(LOG_EMERG, "Disk-TB: Detected I/O Hang! Rebooting NOW!\n"); >- reboot(RB_AUTOBOOT); >+ REBOOT(RB_AUTOBOOT); > #else > clulog(LOG_WARNING, "Disk-TB: Detected I/O Hang!\n"); > #endif >Index: src/daemons/clusvcmgrd.c >=================================================================== >RCS file: /cvs/devel/clumanager/src/daemons/clusvcmgrd.c,v >retrieving revision 1.64 >retrieving revision 1.65 >diff -u -r1.64 -r1.65 >--- src/daemons/clusvcmgrd.c 22 Jun 2006 15:03:01 -0000 1.64 >+++ src/daemons/clusvcmgrd.c 11 Jan 2007 18:43:29 -0000 1.65 >@@ -25,7 +25,7 @@ > * > */ > >-/*static const char *version __attribute__ ((unused)) = "$Revision: 1.64 $";*/ >+/*static const char *version __attribute__ ((unused)) = "$Revision: 1.65 $";*/ > > #include <stdio.h> > #include <stdlib.h> >@@ -980,7 +980,7 @@ > "Failed to stop service %s uncleanly - REBOOTING\n", > svcName); > sleep(1); >- reboot(RB_AUTOBOOT); >+ REBOOT(RB_AUTOBOOT); > } > return SUCCESS; > } >@@ -2926,7 +2926,7 @@ > clulog(LOG_EMERG, "Couldn't reconnect " > "to the quorum daemon! " > "REBOOTING"); >- reboot(RB_AUTOBOOT); >+ REBOOT(RB_AUTOBOOT); > } > } > }
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 211238
:
138735
| 145393