diff --git a/objectstore/Agent.cpp b/objectstore/Agent.cpp index 89cbe6154614afa3925aec59a431a8241b071c7d..4e6c3e91ef2905ba2af588bfa448e79245d13817 100644 --- a/objectstore/Agent.cpp +++ b/objectstore/Agent.cpp @@ -45,6 +45,7 @@ void cta::objectstore::Agent::initialize() { m_payload.set_timeout_us(120*1000*1000); m_payload.set_description(""); m_payload.set_being_garbage_collected(false); + m_payload.set_gc_needed(false); m_payloadInterpreted = true; } @@ -149,6 +150,17 @@ void cta::objectstore::Agent::setBeingGarbageCollected() { m_payload.set_being_garbage_collected(true); } +void cta::objectstore::Agent::setNeedsGarbageCollection() { + checkPayloadWritable(); + m_payload.set_gc_needed(true); +} + +bool cta::objectstore::Agent::needsGarbageCollection() { + checkPayloadReadable(); + if (!m_payload.has_gc_needed()) return false; + return m_payload.gc_needed(); +} + void cta::objectstore::Agent::garbageCollect(const std::string& presumedOwner, AgentReference & agentReference, log::LogContext & lc, cta::catalogue::Catalogue & catalogue) { checkPayloadWritable(); diff --git a/objectstore/Agent.hpp b/objectstore/Agent.hpp index da25916118e05e87baab7a875eb2f7a46239b94a..de371d1134ab34c6db3dafb780011164a185f627 100644 --- a/objectstore/Agent.hpp +++ b/objectstore/Agent.hpp @@ -62,6 +62,10 @@ public: bool isBeingGarbageCollected(); + void setNeedsGarbageCollection(); + + bool needsGarbageCollection(); + void garbageCollect(const std::string &presumedOwner, AgentReference & agentReference, log::LogContext & lc, cta::catalogue::Catalogue & catalogue) override; diff --git a/objectstore/AgentWatchdog.hpp b/objectstore/AgentWatchdog.hpp index 9f1a157a1913742a15996eeb6c34d488ea7a0a53..0ae9af672bb34e8dd8980078e158c48cd70b43ab 100644 --- a/objectstore/AgentWatchdog.hpp +++ b/objectstore/AgentWatchdog.hpp @@ -25,25 +25,28 @@ namespace cta { namespace objectstore { class AgentWatchdog { public: AgentWatchdog(const std::string & name, Backend & os): m_agent(name, os), - m_heartbeatCounter(readHeartbeat()) { + m_heartbeatCounter(readGCData().heartbeat) { m_agent.fetchNoLock(); m_timeout = m_agent.getTimeout(); } bool checkAlive() { - uint64_t newHeartBeatCount; - try { - newHeartBeatCount = readHeartbeat(); + struct gcData newGCData; + try { + newGCData = readGCData(); } catch (Backend::NoSuchObject &) { // The agent could be gone. This is not an error. Mark it as alive, // and will be trimmed later. return true; } auto timer = m_timer.secs(); - if (newHeartBeatCount == m_heartbeatCounter && timer > m_timeout) + // If GC is required, it's easy... + if (newGCData.needsGC) return false; + // If heartbeat has not moved for more than the timeout, we declare the agent dead. + if (newGCData.heartbeat == m_heartbeatCounter && timer > m_timeout) return false; - if (newHeartBeatCount != m_heartbeatCounter) { - m_heartbeatCounter = newHeartBeatCount; + if (newGCData.heartbeat != m_heartbeatCounter) { + m_heartbeatCounter = newGCData.heartbeat; m_timer.reset(); } return true; @@ -51,7 +54,9 @@ public: std::list<log::Param> getDeadAgentDetails() { std::list<log::Param> ret; - ret.push_back(log::Param("currentHeartbeat", readHeartbeat())); + auto gcData = readGCData(); + ret.push_back(log::Param("currentHeartbeat", gcData.heartbeat)); + ret.push_back(log::Param("GCRequested", gcData.needsGC?"true":"false")); ret.push_back(log::Param("timeout", m_timeout)); ret.push_back(log::Param("timer", m_timer.secs())); ret.push_back(log::Param("heartbeatAtTimerStart", m_heartbeatCounter)); @@ -72,9 +77,16 @@ private: uint64_t m_heartbeatCounter; double m_timeout; - uint64_t readHeartbeat() { + struct gcData { + uint64_t heartbeat; + bool needsGC; + }; + struct gcData readGCData() { m_agent.fetchNoLock(); - return m_agent.getHeartbeatCount(); + struct gcData ret; + ret.heartbeat = m_agent.getHeartbeatCount(); + ret.needsGC = m_agent.needsGarbageCollection(); + return ret; } }; diff --git a/objectstore/BackendPopulator.cpp b/objectstore/BackendPopulator.cpp index 3a7f71a1af683f77cd2ca404b42be1b19901d246..7c6d84ceb5e74c0ce857acfedf5dc881baefe769 100644 --- a/objectstore/BackendPopulator.cpp +++ b/objectstore/BackendPopulator.cpp @@ -66,6 +66,8 @@ BackendPopulator::~BackendPopulator() throw() { agent.fetch(); if (m_leaveNonEmptyAgentBehind && !agent.isEmpty()) { cta::log::ScopedParamContainer params(m_lc); + agent.setNeedsGarbageCollection(); + agent.commit(); params.add("agentObject", agent.getAddressIfSet()) .add("ownedObjectCount", agent.getOwnershipList().size()); m_lc.log(log::WARNING, "In BackendPopulator::~BackendPopulator(): not deleting non-empty agent object, left for garbage collection."); diff --git a/objectstore/cta.proto b/objectstore/cta.proto index 0ef553d93437f4172d93bf963f8d9d657166f178..c3797e3a5c0fdf2537be54df515dce3a26b675ad 100644 --- a/objectstore/cta.proto +++ b/objectstore/cta.proto @@ -157,6 +157,7 @@ message Agent { required uint64 timeout_us = 2002; repeated string ownedobjects = 2003; optional bool being_garbage_collected = 2004 [default = false]; + optional bool gc_needed = 2005 [default = false]; } message AgentRegister {