Commit d88a0145 authored by Eric Cano's avatar Eric Cano
Browse files

Fixed garbage collector dying when agent disappears on the wrong momemt.

parent e8c3de80
......@@ -43,6 +43,10 @@ public:
return true;
}
bool checkExists() {
return m_agent.exists();
}
void setTimeout(double timeout) {
m_timeout = timeout;
}
......
......@@ -139,6 +139,7 @@ void GarbageCollector::checkHeartbeats(log::LogContext & lc) {
for (std::map<std::string, AgentWatchdog * >::iterator wa = m_watchedAgents.begin();
wa != m_watchedAgents.end();) {
// Get the heartbeat. Clean dead agents and remove references to them
try {
if (!wa->second->checkAlive()) {
cleanupDeadAgent(wa->first, lc);
Agent ourAgent(m_ourAgentReference.getAgentAddress(), m_objectStore);
......@@ -151,6 +152,15 @@ void GarbageCollector::checkHeartbeats(log::LogContext & lc) {
} else {
wa++;
}
} catch (cta::exception::Exception & ex) {
if (wa->second->checkExists()) {
// We really have a problem: we failed to check on an agent, that is still present.
throw;
} else {
// The agent is simply gone on the wrong time. It will be trimmed from the list on the next pass.
wa++;
}
}
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment