Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 870181 Details for
Bug 987628
Agent can block indefinitely at startup if server has an issue
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
Fixes the stuck at startup issue
0001-BZ-987628-Agent-can-block-indefinitely-at-startup.patch (text/plain), 7.15 KB, created by
Elias Ross
on 2014-03-04 00:13:42 UTC
(
hide
)
Description:
Fixes the stuck at startup issue
Filename:
MIME Type:
Creator:
Elias Ross
Created:
2014-03-04 00:13:42 UTC
Size:
7.15 KB
patch
obsolete
>From 195c85d5430d8480de361ed3468200132d1081b4 Mon Sep 17 00:00:00 2001 >From: Elias Ross <elias_ross@apple.com> >Date: Mon, 17 Feb 2014 11:30:44 -0800 >Subject: [PATCH 1/2] BZ 987628 - Agent can block indefinitely at startup > >It appears the agent doesn't care that registration fails. The agent will >simply retry 5 times every 10 seconds, then the agent will block forever >waiting for the server to ping it, which won't happen. > >This change basically forces a complete shutdown after registration fails and >displays an error as such. > >There are a couple of cleanup things done for concurrency reasons as well. >--- > .../java/org/rhq/enterprise/agent/AgentMain.java | 38 ++++++++++++---------- > .../rhq/enterprise/agent/VMHealthCheckTest.java | 3 +- > 2 files changed, 22 insertions(+), 19 deletions(-) > >diff --git a/modules/enterprise/agent/src/main/java/org/rhq/enterprise/agent/AgentMain.java b/modules/enterprise/agent/src/main/java/org/rhq/enterprise/agent/AgentMain.java >index 33d2cc8..76ca12c 100644 >--- a/modules/enterprise/agent/src/main/java/org/rhq/enterprise/agent/AgentMain.java >+++ b/modules/enterprise/agent/src/main/java/org/rhq/enterprise/agent/AgentMain.java >@@ -53,6 +53,7 @@ > import java.util.concurrent.ScheduledThreadPoolExecutor; > import java.util.concurrent.TimeUnit; > import java.util.concurrent.atomic.AtomicBoolean; >+import java.util.concurrent.atomic.AtomicReference; > import java.util.concurrent.locks.ReadWriteLock; > import java.util.concurrent.locks.ReentrantReadWriteLock; > import java.util.prefs.BackingStoreException; >@@ -319,12 +320,12 @@ > * itself with the server. This is an array because the array itself will be used for its lock to synchronize access > * to the thread. > */ >- private Thread[] m_registrationThread = new Thread[1]; >+ private final AtomicReference<Thread> m_registrationThread = new AtomicReference<Thread>(); > > /** > * Will be non-<code>null</code> if this agent has successfully registered with the server. > */ >- private AgentRegistrationResults m_registration; >+ private volatile AgentRegistrationResults m_registration; > > /** > * This is the management MBean responsible for managing and monitoring this agent. This is the object the agent >@@ -345,7 +346,7 @@ > * allowing one failover attempt to happen at any one time regardless of the number of > * concurrent messages/failovers requested. > */ >- private long[] m_lastFailoverTime = new long[] { 0L }; >+ private final long[] m_lastFailoverTime = new long[] { 0L }; > > /** > * Thread used to try to maintain connectivity to the primary server as much as possible. >@@ -355,7 +356,7 @@ > /** > * Object that remembers when the last connect agent message was sent and to which server. > */ >- private LastSentConnectAgent m_lastSentConnectAgent = new LastSentConnectAgent(); >+ private final LastSentConnectAgent m_lastSentConnectAgent = new LastSentConnectAgent(); > > /** > * This is the number of milliseconds this agent clock differs from its server's clock. >@@ -448,6 +449,10 @@ public static void main(String[] args) { > agent.getOut().println(MSG.getMsg(AgentI18NResourceKeys.AGENT_START_FAILURE)); > anse.printStackTrace(agent.getOut()); > retries = MAX_RETRIES; // this is unrecoverable, we need this main thread to exit *now* >+ } catch (AgentRegistrationException e) { >+ LOG.fatal(e, AgentI18NResourceKeys.AGENT_START_FAILURE); >+ e.printStackTrace(agent.getOut()); >+ retries = MAX_RETRIES; > } catch (Exception e) { > LOG.fatal(e, AgentI18NResourceKeys.AGENT_START_FAILURE); > >@@ -689,8 +694,11 @@ public void start() throws Exception { > BootstrapLatchCommandListener latch = new BootstrapLatchCommandListener(); > startCommServices(latch); // note that we start the comm services before we start the plugin container > startManagementServices(); // we start our metric collectors before plugin container so the agent plugin can work >- prepareStartupWorkRequiringServer(); >+ boolean mustRegister = prepareStartupWorkRequiringServer(); > waitForServer(m_configuration.getWaitForServerAtStartupMsecs()); >+ if (mustRegister && !isRegistered()) { >+ throw new AgentRegistrationException(MSG.getMsg(AgentI18NResourceKeys.AGENT_CANNOT_REGISTER)); >+ } > > if (!m_configuration.doNotStartPluginContainerAtStartup()) { > // block indefinitely - we cannot continue until we are registered, we have plugins and the PC starts >@@ -1560,24 +1568,18 @@ public void run() { > } > }; > >- // another paraniod synchronization - just in case multiple threads attempt to concurrently register >+ // just in case multiple threads attempt to concurrently register > // this agent, this assures that only one registration thread is running - any old thread that > // may still be running will be interrupted (which will eventually cause it to die). Its > // OK if more than one registration command is sent via the task, that is concurrent-safe. > // This just ensures that only one registration thread continues to run. > >- Thread thread; >- >- synchronized (m_registrationThread) { >- thread = m_registrationThread[0]; >- if (thread != null) { >- thread.interrupt(); // make sure the old thread eventually dies >- } >- >- thread = new Thread(task, "RHQ Agent Registration Thread"); >- thread.setDaemon(true); >- m_registrationThread[0] = thread; >- thread.start(); >+ Thread thread = new Thread(task, "RHQ Agent Registration Thread"); >+ thread.setDaemon(true); >+ thread.start(); >+ Thread old = m_registrationThread.getAndSet(thread); >+ if (old != null) { >+ old.interrupt(); > } > > if (wait > 0L) { >diff --git a/modules/enterprise/agent/src/test/java/org/rhq/enterprise/agent/VMHealthCheckTest.java b/modules/enterprise/agent/src/test/java/org/rhq/enterprise/agent/VMHealthCheckTest.java >index 2f60990..b09d850 100644 >--- a/modules/enterprise/agent/src/test/java/org/rhq/enterprise/agent/VMHealthCheckTest.java >+++ b/modules/enterprise/agent/src/test/java/org/rhq/enterprise/agent/VMHealthCheckTest.java >@@ -10,7 +10,7 @@ > @Test > public class VMHealthCheckTest { > >- private int interval = 1000; >+ private final int interval = 1000; > > private final Logger LOG = AgentI18NFactory.getLogger(getClass()); > >@@ -20,6 +20,7 @@ public void testIt() throws Exception { > p.put(AgentConfigurationConstants.VM_HEALTH_CHECK_INTERVAL_MSECS, "" + interval); > p.put(AgentConfigurationConstants.WAIT_FOR_SERVER_AT_STARTUP_MSECS, "100"); > p.put(AgentConfigurationConstants.DO_NOT_START_PLUGIN_CONTAINER_AT_STARTUP, "true"); >+ agent.getConfiguration().setAgentSecurityToken("foo"); > VMHealthCheckThread t = new VMHealthCheckThread(agent); > t.start(); > assert t.isOutOfMemory() == false: "memory should be good"; >-- >1.8.1.2 >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 987628
:
777434
| 870181 |
870182