GarbageCollector.cpp 7.31 KB
Newer Older
1
/*
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
 * The CERN Tape Archive (CTA) project
 * Copyright (C) 2015  CERN
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

19
#include "GarbageCollector.hpp"
20
#include "AgentReference.hpp"
21
#include "RootEntry.hpp"
22
#include <algorithm>
23
24
25

namespace cta { namespace objectstore {

26
27
GarbageCollector::GarbageCollector(Backend & os, AgentReference & agentReference, catalogue::Catalogue & catalogue): 
  m_objectStore(os), m_catalogue(catalogue), m_ourAgentReference(agentReference), m_agentRegister(os) {
28
29
30
  RootEntry re(m_objectStore);
  ScopedSharedLock reLock(re);
  re.fetch();
31
  m_agentRegister.setAddress(re.getAgentRegisterAddress());
32
33
34
35
  reLock.release();
  ScopedSharedLock arLock(m_agentRegister);
  m_agentRegister.fetch();
}
36

37
38
39
40
void GarbageCollector::runOnePass(log::LogContext & lc) {
  trimGoneTargets(lc);
  aquireTargets(lc);
  checkHeartbeats(lc);
41
}
42
  
43
void GarbageCollector::trimGoneTargets(log::LogContext & lc) {
44
  m_agentRegister.fetchNoLock();
45
  std::list<std::string> agentList = m_agentRegister.getAgents();
46
47
  // Find the agents we knew about and are not listed anymore.
  // We will just stop looking for them.
48
49
50
51
52
  for (std::map<std::string, AgentWatchdog * >::iterator wa
        = m_watchedAgents.begin();
      wa != m_watchedAgents.end();) {
    if (agentList.end() == std::find(agentList.begin(), agentList.end(), wa->first)) {
      delete wa->second;
53
      log::ScopedParamContainer params(lc);
54
      params.add("agentAddress", wa->first);
55
      m_watchedAgents.erase(wa++);
56
      lc.log(log::INFO, "In GarbageCollector::trimGoneTargets(): removed now gone agent.");
57
58
    } else {
      wa++;
59
60
    }
  }
61
}
62

63
void GarbageCollector::aquireTargets(log::LogContext & lc) {
64
65
  m_agentRegister.fetchNoLock();
  // We will now watch all agents we do not know about yet.
66
  std::list<std::string> candidatesList = m_agentRegister.getUntrackedAgents();
67
68
69
70
71
72
  // Build a set of our own tracked agents.
  std::set<std::string> alreadyTrackedAgents;
  for (auto &ata: m_watchedAgents) {
    alreadyTrackedAgents.insert(ata.first);
  }
  for (auto &c: candidatesList) {
73
    // We don't monitor ourselves
74
    if (c != m_ourAgentReference.getAgentAddress() && !alreadyTrackedAgents.count(c)) {
75
76
77
      // So we have a candidate we might want to monitor
      // First, check that the agent entry exists, and that ownership
      // is indeed pointing to the agent register
78
      Agent ag(c, m_objectStore);
79
      try {
80
81
82
83
84
85
86
        ag.fetchNoLock();
      } catch (...) {
        // The agent could simply be gone... (If not, let the complain go through).
        if (m_objectStore.exists(c)) throw;
        continue;
      }
      if (ag.getOwner() == m_agentRegister.getAddressIfSet()) {
87
      }
88
89
      log::ScopedParamContainer params(lc);
      params.add("agentAddress", ag.getAddressIfSet())
90
            .add("gcAgentAddress", m_ourAgentReference.getAgentAddress());
91
      lc.log(log::INFO, "In GarbageCollector::aquireTargets(): started tracking an untracked agent");
92
      // Agent is to be tracked, let's track it.
93
      double timeout=ag.getTimeout();
94
95
96
      m_watchedAgents[c] =
        new AgentWatchdog(c, m_objectStore);
      m_watchedAgents[c]->setTimeout(timeout);
97
    }
98
  }
99
100
}
 
101
void GarbageCollector::checkHeartbeats(log::LogContext & lc) {
102
103
104
105
106
  // Check the heartbeats of the watched agents
  // We can still fail on many steps
  for (std::map<std::string, AgentWatchdog * >::iterator wa = m_watchedAgents.begin();
      wa != m_watchedAgents.end();) {
    // Get the heartbeat. Clean dead agents and remove references to them
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
    try {
      if (!wa->second->checkAlive()) {
        cleanupDeadAgent(wa->first, lc);
        delete wa->second;
        m_watchedAgents.erase(wa++);
      } else {
        wa++;
      }
    } catch (cta::exception::Exception & ex) {
      if (wa->second->checkExists()) {
        // We really have a problem: we failed to check on an agent, that is still present.
        throw;
      } else {
        // The agent is simply gone on the wrong time. It will be trimmed from the list on the next pass.
        wa++;
      }
123
124
125
126
    }
  }
}

Eric Cano's avatar
Eric Cano committed
127
void GarbageCollector::cleanupDeadAgent(const std::string & address, log::LogContext & lc) {
128
129
130
131
132
  // We detected a dead agent. Try and take ownership of it. It could already be owned
  // by another garbage collector.
  // To minimize locking, take a lock on the agent and check its ownership first.
  // We do not need to be defensive about exception here as calling function will
  // deal with them.
Eric Cano's avatar
Eric Cano committed
133
134
135
136
137
  Agent agent(address, m_objectStore);
  ScopedExclusiveLock agLock(agent);
  agent.fetch();
  log::ScopedParamContainer params(lc);
  params.add("agentAddress", agent.getAddressIfSet())
138
139
140
141
142
143
        .add("gcAgentAddress", m_ourAgentReference.getAgentAddress());
  if (agent.getOwner() != m_agentRegister.getAddressIfSet()) {
    params.add("agentOwner", agent.getOwner());
    lc.log(log::INFO, "In GarbageCollector::cleanupDeadAgent(): skipping agent which is not owned by agent register anymore.");
    // The agent will be removed from our ownership by the calling function: we're done.
    return;
Eric Cano's avatar
Eric Cano committed
144
  }
145
146
147
148
149
150
151
152
153
154
  // Aquire ownership of the agent.
  m_ourAgentReference.addToOwnership(address,m_objectStore);
  agent.setOwner(m_ourAgentReference.getAgentAddress());
  agent.commit();
  // Update the register
  ScopedExclusiveLock arl(m_agentRegister);
  m_agentRegister.fetch();
  m_agentRegister.trackAgent(address);
  m_agentRegister.commit();
  arl.release();
Eric Cano's avatar
Eric Cano committed
155
156
157
158
159
160
161
162
163
164
165
166
167
168
  lc.log(log::INFO, "In GarbageCollector::cleanupDeadAgent(): will cleanup dead agent.");
  // Return all objects owned by the agent to their respective backup owners
  auto ownedObjects = agent.getOwnershipList();
  for (auto obj = ownedObjects.begin(); obj!= ownedObjects.end(); obj++) {
   // Find the object
   GenericObject go(*obj, m_objectStore);
   log::ScopedParamContainer params2(lc);
   params2.add("objectAddress", go.getAddressIfSet());
   // If the object does not exist, we're done.
   if (go.exists()) {
     ScopedExclusiveLock goLock(go);
     go.fetch();
     // Call GenericOpbject's garbage collect method, which in turn will
     // delegate to the object type's garbage collector.
169
     go.garbageCollectDispatcher(goLock, address, m_ourAgentReference, lc, m_catalogue);
Eric Cano's avatar
Eric Cano committed
170
171
172
     lc.log(log::INFO, "In GarbageCollector::cleanupDeadAgent(): garbage collected owned object.");
   } else {
     lc.log(log::INFO, "In GarbageCollector::cleanupDeadAgent(): skipping garbage collection of now gone object.");
173
   }
Eric Cano's avatar
Eric Cano committed
174
175
176
177
178
   // In all cases, relinquish ownership for this object
   agent.removeFromOwnership(*obj);
   agent.commit();
  }
  // We now processed all the owned objects. We can delete the agent's entry
179
  agent.removeAndUnregisterSelf(lc);
Eric Cano's avatar
Eric Cano committed
180
  lc.log(log::INFO, "In GarbageCollector::cleanupDeadAgent(): agent entry removed.");
181
182
  // We can remove the agent from our own ownership.
  m_ourAgentReference.removeFromOwnership(address, m_objectStore);
Eric Cano's avatar
Eric Cano committed
183
184
}

185
}}