Commit 697ea45a authored by Eric Cano's avatar Eric Cano
Browse files

Implemented first version of sharded archive queues.

Updated locking of ObjetOps in order to allow locking of sub-objects.
Make cta-objectstore-dump-object lockfree.
parent 97450998
This diff is collapsed.
......@@ -51,7 +51,14 @@ public:
// Commit with sanity checks (override from ObjectOps
void commit();
private:
// Validates all summaries are in accordance with each other.
bool checkMapsAndShardsCoherency();
// Rebuild from shards if something goes wrong.
void rebuild();
public:
// Set/get tape pool
void setTapePool(const std::string & name);
std::string getTapePool();
......@@ -65,17 +72,25 @@ public:
const cta::common::dataStructures::MountPolicy policy;
time_t startTime;
};
void addJobsAndCommit(std::list<JobToAdd> & jobsToAdd);
/** Add the jobs to the queue.
* The lock will be used to mark the shards as locked (the lock is the same for
* the main object and the shard, the is no shared access.
* As we potentially have to create new shard(s), we need access to the agent
* reference (to generate a non-colliding object name).
* We will also log the shard creation (hence the context)
*/
void addJobsAndCommit(std::list<JobToAdd> & jobsToAdd, AgentReference & agentReference, log::LogContext & lc);
/// This version will check for existence of the job in the queue before
// returns the count and sizes of actually added jobs (if any).
struct AdditionSummary {
uint64_t files = 0;
uint64_t bytes = 0;
};
AdditionSummary addJobsIfNecessaryAndCommit(std::list<JobToAdd> & jobsToAdd);
AdditionSummary addJobsIfNecessaryAndCommit(std::list<JobToAdd> & jobsToAdd,
AgentReference & agentReference, log::LogContext & lc);
struct JobsSummary {
uint64_t files;
uint64_t jobs;
uint64_t bytes;
time_t oldestJobStartTime;
uint64_t priority;
......@@ -84,7 +99,7 @@ public:
};
JobsSummary getJobsSummary();
void removeJobsAndCommit(const std::list<std::string> & requestsToRemove);
void removeJobsAndCommit(const std::list<std::string> & jobsToRemove);
struct JobDump {
uint64_t size;
std::string address;
......@@ -111,6 +126,13 @@ public:
cta::catalogue::Catalogue & catalogue) override;
std::string dump();
// The shard size. From experience, 100k is where we start to see performance difference,
// but nothing prevents us from using a smaller size.
// The performance will be roughly flat until the queue size reaches the square of this limit
// (meaning the queue object updates start to take too much time).
// with this current value of 25k, the performance should be roughly flat until 25k^2=625M.
static const uint64_t c_maxShardSize = 25000;
};
}}
/*
* The CERN Tape Archive (CTA) project
* Copyright (C) 2015 CERN
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "ArchiveQueueShard.hpp"
namespace cta { namespace objectstore {
ArchiveQueueShard::ArchiveQueueShard(Backend& os):
ObjectOps<serializers::ArchiveQueueShard, serializers::ArchiveQueueShard_t>(os) { }
ArchiveQueueShard::ArchiveQueueShard(const std::string& address, Backend& os):
ObjectOps<serializers::ArchiveQueueShard, serializers::ArchiveQueueShard_t>(os, address) { }
void ArchiveQueueShard::rebuild() {
checkPayloadWritable();
uint64_t totalSize=0;
for (auto j: m_payload.archivejobs()) {
totalSize += j.size();
}
m_payload.set_archivejobstotalsize(totalSize);
}
void ArchiveQueueShard::garbageCollect(const std::string& presumedOwner, AgentReference& agentReference, log::LogContext& lc, cta::catalogue::Catalogue& catalogue) {
throw exception::Exception("In ArchiveQueueShard::garbageCollect(): garbage collection should not be necessary for this type of object.");
}
ArchiveQueue::CandidateJobList ArchiveQueueShard::getCandidateJobList(uint64_t maxBytes, uint64_t maxFiles, std::set<std::string> archiveRequestsToSkip) {
checkPayloadReadable();
ArchiveQueue::CandidateJobList ret;
ret.remainingBytesAfterCandidates = m_payload.archivejobstotalsize();
ret.remainingFilesAfterCandidates = m_payload.archivejobs_size();
for (auto & j: m_payload.archivejobs()) {
if (!archiveRequestsToSkip.count(j.address())) {
ret.candidates.push_back({j.size(), j.address(), (uint16_t)j.copynb()});
ret.candidateBytes += j.size();
ret.candidateFiles ++;
}
ret.remainingBytesAfterCandidates -= j.size();
ret.remainingFilesAfterCandidates--;
if (ret.candidateBytes >= maxBytes || ret.candidateFiles >= maxFiles) break;
}
return ret;
}
auto ArchiveQueueShard::removeJobs(const std::list<std::string>& jobsToRemove) -> RemovalResult {
checkPayloadWritable();
RemovalResult ret;
uint64_t totalSize = m_payload.archivejobstotalsize();
auto * jl=m_payload.mutable_archivejobs();
for (auto &rrt: jobsToRemove) {
bool found = false;
do {
found = false;
// Push the found entry all the way to the end.
for (size_t i=0; i<(size_t)jl->size(); i++) {
if (jl->Get(i).address() == rrt) {
found = true;
const auto & j = jl->Get(i);
ret.removedJobs.emplace_back(JobInfo());
ret.removedJobs.back().address = j.address();
ret.removedJobs.back().copyNb = j.copynb();
ret.removedJobs.back().maxDrivesAllowed = j.maxdrivesallowed();
ret.removedJobs.back().minArchiveRequestAge = j.minarchiverequestage();
ret.removedJobs.back().priority = j.priority();
ret.removedJobs.back().size = j.size();
ret.removedJobs.back().startTime = j.starttime();
ret.bytesRemoved += j.size();
totalSize -= j.size();
ret.jobsRemoved++;
m_payload.set_archivejobstotalsize(m_payload.archivejobstotalsize() - j.size());
while (i+1 < (size_t)jl->size()) {
jl->SwapElements(i, i+1);
i++;
}
break;
}
}
// and remove it
if (found)
jl->RemoveLast();
} while (found);
}
ret.bytesAfter = totalSize;
ret.jobsAfter = m_payload.archivejobs_size();
return ret;
}
void ArchiveQueueShard::initialize(const std::string& owner) {
ObjectOps<serializers::ArchiveQueueShard, serializers::ArchiveQueueShard_t>::initialize();
setOwner(owner);
setBackupOwner(owner);
m_payload.set_archivejobstotalsize(0);
m_payloadInterpreted=true;
}
auto ArchiveQueueShard::dumpJobs() -> std::list<JobInfo> {
checkPayloadReadable();
std::list<JobInfo> ret;
for (auto &j: m_payload.archivejobs()) {
ret.emplace_back(JobInfo{j.size(), j.address(), (uint16_t)j.copynb(), j.priority(),
j.minarchiverequestage(), j.maxdrivesallowed(), (time_t)j.starttime()});
}
return ret;
}
auto ArchiveQueueShard::getJobsSummary() -> JobsSummary {
checkPayloadReadable();
JobsSummary ret;
ret.bytes = m_payload.archivejobstotalsize();
return ret;
}
uint64_t ArchiveQueueShard::addJob(ArchiveQueue::JobToAdd& jobToAdd) {
checkPayloadWritable();
auto * j = m_payload.mutable_archivejobs()->Add();
j->set_address(jobToAdd.archiveRequestAddress);
j->set_size(jobToAdd.fileSize);
j->set_fileid(jobToAdd.archiveFileId);
j->set_copynb(jobToAdd.job.copyNb);
j->set_maxdrivesallowed(jobToAdd.policy.maxDrivesAllowed);
j->set_priority(jobToAdd.policy.archivePriority);
j->set_minarchiverequestage(jobToAdd.policy.archiveMinRequestAge);
j->set_starttime(jobToAdd.startTime);
m_payload.set_archivejobstotalsize(m_payload.archivejobstotalsize()+jobToAdd.fileSize);
return m_payload.archivejobs_size();
}
}}
\ No newline at end of file
/*
* The CERN Tape Archive (CTA) project
* Copyright (C) 2015 CERN
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "ArchiveQueue.hpp"
namespace cta { namespace objectstore {
class ArchiveQueueShard: public ObjectOps<serializers::ArchiveQueueShard, serializers::ArchiveQueueShard_t> {
public:
// Constructor with undefined address
ArchiveQueueShard(Backend & os);
// Constructor
ArchiveQueueShard(const std::string & address, Backend & os);
// Forbid/hide base initializer
void initialize() = delete;
// Initializer
void initialize(const std::string & owner);
void garbageCollect(const std::string& presumedOwner, AgentReference& agentReference, log::LogContext& lc, cta::catalogue::Catalogue& catalogue) override;
struct JobInfo {
uint64_t size;
std::string address;
uint16_t copyNb;
uint64_t priority;
uint64_t minArchiveRequestAge;
uint64_t maxDrivesAllowed;
time_t startTime;
};
std::list<JobInfo> dumpJobs();
struct JobsSummary {
uint64_t jobs;
uint64_t bytes;
};
JobsSummary getJobsSummary();
/**
* adds job, returns new size
*/
uint64_t addJob(ArchiveQueue::JobToAdd & jobToAdd);
struct RemovalResult {
uint64_t jobsRemoved = 0;
uint64_t jobsAfter = 0;
uint64_t bytesRemoved = 0;
uint64_t bytesAfter = 0;
std::list<JobInfo> removedJobs;
};
/**
* Removes jobs from shard (and from the to remove list). Returns list of removed jobs.
*/
RemovalResult removeJobs(const std::list<std::string> & jobsToRemove);
ArchiveQueue::CandidateJobList getCandidateJobList(uint64_t maxBytes, uint64_t maxFiles, std::set<std::string> archiveRequestsToSkip);
/** Re compute summaries in case they do not match the array content. */
void rebuild();
};
}} // namespace cta::objectstore
\ No newline at end of file
......@@ -318,7 +318,7 @@ void ArchiveRequest::garbageCollect(const std::string &presumedOwner, AgentRefer
std::list<ArchiveQueue::JobToAdd> jta;
jta.push_back({jd, getAddressIfSet(), getArchiveFile().archiveFileID,
getArchiveFile().fileSize, getMountPolicy(), getEntryLog().time});
aq.addJobsIfNecessaryAndCommit(jta);
aq.addJobsIfNecessaryAndCommit(jta, agentReference, lc);
auto queueUpdateTime = t.secs(utils::Timer::resetCounter);
j->set_owner(aq.getAddressIfSet());
j->set_status(serializers::AJS_PendingMount);
......
......@@ -51,6 +51,7 @@ SET_SOURCE_FILES_PROPERTIES(${CTAProtoDependants}
include_directories (${PROTOBUF3_INCLUDE_DIRS})
add_library (ctaobjectstore SHARED
${CTAProtoSources}
ObjectOps.cpp
RootEntry.cpp
Agent.cpp
AgentHeartbeatThread.cpp
......@@ -58,6 +59,7 @@ add_library (ctaobjectstore SHARED
AgentRegister.cpp
AgentWatchdog.cpp
ArchiveQueue.cpp
ArchiveQueueShard.cpp
RetrieveQueue.cpp
ArchiveRequest.cpp
RetrieveRequest.cpp
......
......@@ -380,7 +380,7 @@ void GarbageCollector::cleanupDeadAgent(const std::string & address, log::LogCon
Helpers::getLockedAndFetchedQueue<ArchiveQueue>(aq, aql, m_ourAgentReference, tapepool.first, lc);
queueLockFetchTime = t.secs(utils::Timer::resetCounter);
auto jobsSummary=aq.getJobsSummary();
filesBefore=jobsSummary.files;
filesBefore=jobsSummary.jobs;
bytesBefore=jobsSummary.bytes;
// We have the queue. We will loop on the requests, add them to the queue. We will launch their updates
// after committing the queue.
......@@ -394,7 +394,7 @@ void GarbageCollector::cleanupDeadAgent(const std::string & address, log::LogCon
}
}
}
auto addedJobs = aq.addJobsIfNecessaryAndCommit(jtal);
auto addedJobs = aq.addJobsIfNecessaryAndCommit(jtal, m_ourAgentReference, lc);
queueProcessAndCommitTime = t.secs(utils::Timer::resetCounter);
// If we have an unexpected failure, we will re-run the individual garbage collection. Before that,
// we will NOT remove the object from agent's ownership. This variable is declared a bit ahead so
......@@ -492,7 +492,7 @@ void GarbageCollector::cleanupDeadAgent(const std::string & address, log::LogCon
.add("bytesDequeuedAfterErrors", bytesDequeued)
.add("filesBefore", filesBefore)
.add("bytesBefore", bytesBefore)
.add("filesAfter", jobsSummary.files)
.add("filesAfter", jobsSummary.jobs)
.add("bytesAfter", jobsSummary.bytes)
.add("queueLockFetchTime", queueLockFetchTime)
.add("queueProcessAndCommitTime", queueProcessAndCommitTime)
......
......@@ -387,7 +387,7 @@ TEST(ObjectStore, GarbageCollectorArchiveRequest) {
policy.maxDrivesAllowed = 1;
std::list <cta::objectstore::ArchiveQueue::JobToAdd> jta;
jta.push_back({jd, ar.getAddressIfSet(), ar.getArchiveFile().archiveFileID, 1000U+pass, policy, time(NULL)});
aq.addJobsAndCommit(jta);
aq.addJobsAndCommit(jta, agentRef, lc);
}
if (pass < 4) { pass++; continue; }
// TODO: partially migrated or selected
......@@ -406,7 +406,7 @@ TEST(ObjectStore, GarbageCollectorArchiveRequest) {
policy.maxDrivesAllowed = 1;
std::list <cta::objectstore::ArchiveQueue::JobToAdd> jta;
jta.push_back({jd, ar.getAddressIfSet(), ar.getArchiveFile().archiveFileID, 1000+pass, policy, time(NULL)});
aq.addJobsAndCommit(jta);
aq.addJobsAndCommit(jta, agentRef, lc);
}
if (pass < 5) { pass++; continue; }
// - Still marked a not owned but referenced in the agent
......@@ -439,8 +439,8 @@ TEST(ObjectStore, GarbageCollectorArchiveRequest) {
auto d1=aq1.dumpJobs();
// We expect all jobs with sizes 1002-1005 inclusive to be connected to
// their respective tape pools.
ASSERT_EQ(5, aq0.getJobsSummary().files);
ASSERT_EQ(5, aq1.getJobsSummary().files);
ASSERT_EQ(5, aq0.getJobsSummary().jobs);
ASSERT_EQ(5, aq1.getJobsSummary().jobs);
}
// Unregister gc's agent
cta::objectstore::ScopedExclusiveLock gcal(gcAgent);
......
......@@ -148,18 +148,17 @@ namespace {
using cta::objectstore::GenericObject;
using cta::objectstore::ScopedExclusiveLock;
template <class C>
std::string dumpWithType(GenericObject * gop, ScopedSharedLock& lock) {
std::string dumpWithType(GenericObject * gop) {
C typedObject(*gop);
lock.transfer(typedObject);
ScopedLock::transfer(*gop, typedObject);
std::string ret = typedObject.dump();
// Release the lock now as if we let the caller do, it will point
// to the then-removed typedObject.
lock.release();
return ret;
}
}
std::string GenericObject::dump(ScopedSharedLock& lock) {
std::string GenericObject::dump() {
checkHeaderReadable();
google::protobuf::util::JsonPrintOptions options;
options.add_whitespace = true;
......@@ -169,31 +168,31 @@ std::string GenericObject::dump(ScopedSharedLock& lock) {
google::protobuf::util::MessageToJsonString(m_header, &headerDump, options);
switch(m_header.type()) {
case serializers::RootEntry_t:
bodyDump = dumpWithType<RootEntry>(this, lock);
bodyDump = dumpWithType<RootEntry>(this);
break;
case serializers::AgentRegister_t:
bodyDump = dumpWithType<AgentRegister>(this, lock);
bodyDump = dumpWithType<AgentRegister>(this);
break;
case serializers::Agent_t:
bodyDump = dumpWithType<Agent>(this, lock);
bodyDump = dumpWithType<Agent>(this);
break;
case serializers::DriveRegister_t:
bodyDump = dumpWithType<DriveRegister>(this, lock);
bodyDump = dumpWithType<DriveRegister>(this);
break;
case serializers::ArchiveQueue_t:
bodyDump = dumpWithType<cta::objectstore::ArchiveQueue>(this, lock);
bodyDump = dumpWithType<cta::objectstore::ArchiveQueue>(this);
break;
case serializers::RetrieveQueue_t:
bodyDump = dumpWithType<cta::objectstore::RetrieveQueue>(this, lock);
bodyDump = dumpWithType<cta::objectstore::RetrieveQueue>(this);
break;
case serializers::ArchiveRequest_t:
bodyDump = dumpWithType<ArchiveRequest>(this, lock);
bodyDump = dumpWithType<ArchiveRequest>(this);
break;
case serializers::RetrieveRequest_t:
bodyDump = dumpWithType<RetrieveRequest>(this, lock);
bodyDump = dumpWithType<RetrieveRequest>(this);
break;
case serializers::SchedulerGlobalLock_t:
bodyDump = dumpWithType<SchedulerGlobalLock>(this, lock);
bodyDump = dumpWithType<SchedulerGlobalLock>(this);
break;
default:
std::stringstream err;
......
......@@ -78,7 +78,7 @@ public:
*
* @param lock reference to the generic object's lock
*/
std::string dump(ScopedSharedLock & lock);
std::string dump();
CTA_GENERATE_EXCEPTION_CLASS(UnsupportedType);
......
/*
* The CERN Tape Archive (CTA) project
* Copyright (C) 2015 CERN
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "ObjectOps.hpp"
namespace cta { namespace objectstore {
ObjectOpsBase::~ObjectOpsBase() {
if (m_lockForSubObject) m_lockForSubObject->dereferenceSubObject(*this);
}
}}
\ No newline at end of file
......@@ -30,6 +30,8 @@
namespace cta { namespace objectstore {
class AgentReference;
class ScopedLock;
class ScopedExclusiveLock;
class ObjectOpsBase {
friend class ScopedLock;
......@@ -42,6 +44,8 @@ protected:
m_headerInterpreted(false), m_payloadInterpreted(false),
m_existingObject(false), m_locksCount(0),
m_locksForWriteCount(0) {}
virtual ~ObjectOpsBase();
public:
CTA_GENERATE_EXCEPTION_CLASS(AddressNotSet);
CTA_GENERATE_EXCEPTION_CLASS(NotLocked);
......@@ -82,6 +86,8 @@ protected:
void checkWritable() {
if (m_existingObject && !m_locksForWriteCount)
throw NotLocked("In ObjectOps::checkWritable: object not locked for write");
if (m_existingObject && !m_exclusiveLock && !m_lockForSubObject)
throw exception::Exception("In ObjectOps::checkWritable: missing reference to exclusive lock");
}
void checkReadable() {
......@@ -163,6 +169,12 @@ protected:
int m_locksCount;
int m_locksForWriteCount;
bool m_noLock = false;
// When locked exclusively, we will keep a reference to the lock,
// so we can propagate it to sub objects with minimal passing through.
ScopedExclusiveLock * m_exclusiveLock = nullptr;
// When being locked as a sub object, we will keep a reference to the lock
// we are provided with. Likewise, the lock will update ourselves when released.
ScopedLock * m_lockForSubObject = nullptr;
};
class ScopedLock {
......@@ -176,6 +188,39 @@ public:
return m_locked;
}
/**
* Virtual function (implemented differently in shared and exclusive locks),
* marking the object as locked.
* @param objectOps pointer to the ObjectOpsBase.
*/
virtual void setObjectLocked(ObjectOpsBase * objectOps) = 0;
/**
* Virtual function (implemented differently in shared and exclusive locks),
* marking the object as unlocked.
* @param objectOps pointer to the ObjectOpsBase.
*/
virtual void setObjectUnlocked(ObjectOpsBase * objectOps) = 0;
/**
* Expand the scope of the current lock to a sub object, which will also be covered
* by this lock. This will allow the sub object to benefit from the same protection
* from lack of proper locking. This feature is to be used with sharded objects.
*/
void includeSubObject(ObjectOpsBase & subObject) {
// To propagate a lock, we should have one to begin with.
checkLocked();
ObjectOpsBase * oob = & subObject;
// Validate the sub object is defined.
checkObjectAndAddressSet(oob);
// Propagate the lock to the sub object (this is lock type dependant).
setObjectLocked(oob);
// Reference ourselves to the sub object so it can declare it destruction to us.
oob->m_lockForSubObject = this;
// Add a reference to the object.
m_subObjectsOps.push_back(oob);
}
/** Move the locked object reference to a new one. This is done when the locked
* object is a GenericObject and the caller instantiated a derived object from
* it. The lock follows the move.
......@@ -183,20 +228,38 @@ public:
* use case).
* New object's locks are moved from the old one (referenced in the lock)
*/
void transfer(ObjectOpsBase & newObject) {
decltype(m_objectOps) oldObj(m_objectOps);
m_objectOps = & newObject;
static void transfer(ObjectOpsBase & oldObject, ObjectOpsBase & newObject) {
// Transfer the locks from old to new object
m_objectOps->m_locksCount = oldObj->m_locksCount;
m_objectOps->m_locksForWriteCount = oldObj->m_locksForWriteCount;
newObject.m_locksCount = oldObject.m_locksCount;
newObject.m_locksForWriteCount = oldObject.m_locksForWriteCount;
newObject.m_exclusiveLock = oldObject.m_exclusiveLock;
newObject.m_lockForSubObject = oldObject.m_lockForSubObject;
newObject.m_noLock = oldObject.m_noLock;
// The old object is not considered locked anymore and should be
// discarded. A previous call the the new object's constructor should
oldObj->m_locksCount = 0;
oldObj->m_locksForWriteCount = 0;
oldObject.m_locksCount = 0;
oldObject.m_locksForWriteCount = 0;
oldObject.m_exclusiveLock = nullptr;
oldObject.m_lockForSubObject = nullptr;
oldObject.m_noLock=false;
}
/**
*
* @param subObject
*/
/**
* Dereference a sub object at destruction time
* @param subObject
*/
void dereferenceSubObject(ObjectOpsBase & subObject) {
m_subObjectsOps.remove(&subObject);
}
virtual ~ScopedLock() {
releaseIfNeeded();
// Each child class will have to call releaseIfNeeded() in their own destructor
// as it relies on pure virtual members of this base class.