Skip to content
Snippets Groups Projects
Commit 6e5503ab authored by Eric Cano's avatar Eric Cano
Browse files

Added flag for immediate garbage collection of agents.

The process can now indicate the agent structure can be garbage collected, saving a wait for a timeout. This
is typically hapenning when archive jobs need requeueing after a session hits the end of the tape.
parent af569d75
Branches
Tags
No related merge requests found
...@@ -45,6 +45,7 @@ void cta::objectstore::Agent::initialize() { ...@@ -45,6 +45,7 @@ void cta::objectstore::Agent::initialize() {
m_payload.set_timeout_us(120*1000*1000); m_payload.set_timeout_us(120*1000*1000);
m_payload.set_description(""); m_payload.set_description("");
m_payload.set_being_garbage_collected(false); m_payload.set_being_garbage_collected(false);
m_payload.set_gc_needed(false);
m_payloadInterpreted = true; m_payloadInterpreted = true;
} }
...@@ -149,6 +150,17 @@ void cta::objectstore::Agent::setBeingGarbageCollected() { ...@@ -149,6 +150,17 @@ void cta::objectstore::Agent::setBeingGarbageCollected() {
m_payload.set_being_garbage_collected(true); m_payload.set_being_garbage_collected(true);
} }
void cta::objectstore::Agent::setNeedsGarbageCollection() {
checkPayloadWritable();
m_payload.set_gc_needed(true);
}
bool cta::objectstore::Agent::needsGarbageCollection() {
checkPayloadReadable();
if (!m_payload.has_gc_needed()) return false;
return m_payload.gc_needed();
}
void cta::objectstore::Agent::garbageCollect(const std::string& presumedOwner, AgentReference & agentReference, log::LogContext & lc, void cta::objectstore::Agent::garbageCollect(const std::string& presumedOwner, AgentReference & agentReference, log::LogContext & lc,
cta::catalogue::Catalogue & catalogue) { cta::catalogue::Catalogue & catalogue) {
checkPayloadWritable(); checkPayloadWritable();
......
...@@ -62,6 +62,10 @@ public: ...@@ -62,6 +62,10 @@ public:
bool isBeingGarbageCollected(); bool isBeingGarbageCollected();
void setNeedsGarbageCollection();
bool needsGarbageCollection();
void garbageCollect(const std::string &presumedOwner, AgentReference & agentReference, log::LogContext & lc, void garbageCollect(const std::string &presumedOwner, AgentReference & agentReference, log::LogContext & lc,
cta::catalogue::Catalogue & catalogue) override; cta::catalogue::Catalogue & catalogue) override;
......
...@@ -25,25 +25,28 @@ namespace cta { namespace objectstore { ...@@ -25,25 +25,28 @@ namespace cta { namespace objectstore {
class AgentWatchdog { class AgentWatchdog {
public: public:
AgentWatchdog(const std::string & name, Backend & os): m_agent(name, os), AgentWatchdog(const std::string & name, Backend & os): m_agent(name, os),
m_heartbeatCounter(readHeartbeat()) { m_heartbeatCounter(readGCData().heartbeat) {
m_agent.fetchNoLock(); m_agent.fetchNoLock();
m_timeout = m_agent.getTimeout(); m_timeout = m_agent.getTimeout();
} }
bool checkAlive() { bool checkAlive() {
uint64_t newHeartBeatCount; struct gcData newGCData;
try { try {
newHeartBeatCount = readHeartbeat(); newGCData = readGCData();
} catch (Backend::NoSuchObject &) { } catch (Backend::NoSuchObject &) {
// The agent could be gone. This is not an error. Mark it as alive, // The agent could be gone. This is not an error. Mark it as alive,
// and will be trimmed later. // and will be trimmed later.
return true; return true;
} }
auto timer = m_timer.secs(); auto timer = m_timer.secs();
if (newHeartBeatCount == m_heartbeatCounter && timer > m_timeout) // If GC is required, it's easy...
if (newGCData.needsGC) return false;
// If heartbeat has not moved for more than the timeout, we declare the agent dead.
if (newGCData.heartbeat == m_heartbeatCounter && timer > m_timeout)
return false; return false;
if (newHeartBeatCount != m_heartbeatCounter) { if (newGCData.heartbeat != m_heartbeatCounter) {
m_heartbeatCounter = newHeartBeatCount; m_heartbeatCounter = newGCData.heartbeat;
m_timer.reset(); m_timer.reset();
} }
return true; return true;
...@@ -51,7 +54,9 @@ public: ...@@ -51,7 +54,9 @@ public:
std::list<log::Param> getDeadAgentDetails() { std::list<log::Param> getDeadAgentDetails() {
std::list<log::Param> ret; std::list<log::Param> ret;
ret.push_back(log::Param("currentHeartbeat", readHeartbeat())); auto gcData = readGCData();
ret.push_back(log::Param("currentHeartbeat", gcData.heartbeat));
ret.push_back(log::Param("GCRequested", gcData.needsGC?"true":"false"));
ret.push_back(log::Param("timeout", m_timeout)); ret.push_back(log::Param("timeout", m_timeout));
ret.push_back(log::Param("timer", m_timer.secs())); ret.push_back(log::Param("timer", m_timer.secs()));
ret.push_back(log::Param("heartbeatAtTimerStart", m_heartbeatCounter)); ret.push_back(log::Param("heartbeatAtTimerStart", m_heartbeatCounter));
...@@ -72,9 +77,16 @@ private: ...@@ -72,9 +77,16 @@ private:
uint64_t m_heartbeatCounter; uint64_t m_heartbeatCounter;
double m_timeout; double m_timeout;
uint64_t readHeartbeat() { struct gcData {
uint64_t heartbeat;
bool needsGC;
};
struct gcData readGCData() {
m_agent.fetchNoLock(); m_agent.fetchNoLock();
return m_agent.getHeartbeatCount(); struct gcData ret;
ret.heartbeat = m_agent.getHeartbeatCount();
ret.needsGC = m_agent.needsGarbageCollection();
return ret;
} }
}; };
......
...@@ -66,6 +66,8 @@ BackendPopulator::~BackendPopulator() throw() { ...@@ -66,6 +66,8 @@ BackendPopulator::~BackendPopulator() throw() {
agent.fetch(); agent.fetch();
if (m_leaveNonEmptyAgentBehind && !agent.isEmpty()) { if (m_leaveNonEmptyAgentBehind && !agent.isEmpty()) {
cta::log::ScopedParamContainer params(m_lc); cta::log::ScopedParamContainer params(m_lc);
agent.setNeedsGarbageCollection();
agent.commit();
params.add("agentObject", agent.getAddressIfSet()) params.add("agentObject", agent.getAddressIfSet())
.add("ownedObjectCount", agent.getOwnershipList().size()); .add("ownedObjectCount", agent.getOwnershipList().size());
m_lc.log(log::WARNING, "In BackendPopulator::~BackendPopulator(): not deleting non-empty agent object, left for garbage collection."); m_lc.log(log::WARNING, "In BackendPopulator::~BackendPopulator(): not deleting non-empty agent object, left for garbage collection.");
......
...@@ -157,6 +157,7 @@ message Agent { ...@@ -157,6 +157,7 @@ message Agent {
required uint64 timeout_us = 2002; required uint64 timeout_us = 2002;
repeated string ownedobjects = 2003; repeated string ownedobjects = 2003;
optional bool being_garbage_collected = 2004 [default = false]; optional bool being_garbage_collected = 2004 [default = false];
optional bool gc_needed = 2005 [default = false];
} }
message AgentRegister { message AgentRegister {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment