Commit b4bbb445 authored by Giuseppe Lo Presti's avatar Giuseppe Lo Presti
Browse files

Fixed CASTOR-4629: Race condition found: file garbage collected before recall loop finishes.

This is actually a partial backport of commit c948dd43.
parent 17cc84a3
......@@ -331,6 +331,13 @@
# stager catalog. The synchronization with the nameserver is not affected.
#GC DisableStagerSync no
# The period during which new files won't be considered for synchronization
# This protects in particular files being created (eg ongoing recalls) by giving
# time to the stager DB to create the associated DiskCopy. Otherwise, we would
# have a time window where the file exist on disk and can be considered by
# synchronization, while it does not exist on the stager. Thus it may be dropped
# Default is one day
#GC SyncGracePeriod 86400
## Security Configuration ######################################################
......
......@@ -150,6 +150,8 @@ castor::gc::GcDaemon::GcDaemon(): castor::server::BaseDaemon("gcd") {
{ 40, "Unexpected exception caught in synchronizeFiles" },
{ 41, "Failed to stat file" },
{ 43, "Could not get fileid from filepath, giving up for this file" },
{ 46, "New synchronization grace period" },
{ 47, "Invalid GC/SyncGracePeriod option, using default" },
{ -1, "" }};
dlfInit(messages);
}
......@@ -50,7 +50,7 @@
#define DEFAULT_CHUNKINTERVAL 1800
#define DEFAULT_CHUNKSIZE 2000
#define DEFAULT_DISABLESTAGERSYNC false
#define DEFAULT_GRACEPERIOD 86400
//-----------------------------------------------------------------------------
// Constructor
......@@ -71,14 +71,15 @@ void castor::gc::SynchronizationThread::run(void*) {
unsigned int chunkInterval = DEFAULT_CHUNKINTERVAL;
unsigned int chunkSize = DEFAULT_CHUNKSIZE;
bool disableStagerSync = DEFAULT_DISABLESTAGERSYNC;
readConfigFile(&chunkInterval, &chunkSize, &disableStagerSync, true);
unsigned int gracePeriod = DEFAULT_GRACEPERIOD;
readConfigFile(&chunkInterval, &chunkSize, &disableStagerSync, &gracePeriod, true);
// Endless loop
for (;;) {
// Get the synchronization interval and chunk size these may have changed
// since the last iteration
readConfigFile(&chunkInterval, &chunkSize, &disableStagerSync);
readConfigFile(&chunkInterval, &chunkSize, &disableStagerSync, &gracePeriod);
if (chunkInterval <= 0) {
// just do nothing if interval = 0
sleep(300);
......@@ -158,7 +159,7 @@ void castor::gc::SynchronizationThread::run(void*) {
struct dirent *file;
while ((file = readdir(files))) {
// Ignore non regular files and files closed too recently (< 1mn)
// Ignore non regular files and files closed too recently.
// This protects in particular recently recalled files by giving time
// to the stager DB to create the associated DiskCopy. Otherwise,
// we would have a time window where the file exist on disk and can
......@@ -170,7 +171,7 @@ void castor::gc::SynchronizationThread::run(void*) {
continue;
} else if (!(filebuf.st_mode & S_IFREG)) {
continue; // not a file
} else if (filebuf.st_mtime > time(NULL) - 600) {
} else if (filebuf.st_mtime > time(NULL) - gracePeriod) {
continue;
}
......@@ -258,12 +259,13 @@ void castor::gc::SynchronizationThread::run(void*) {
//-----------------------------------------------------------------------------
// ReadConfigFile
// readConfigFile
//-----------------------------------------------------------------------------
void castor::gc::SynchronizationThread::readConfigFile
(unsigned int *chunkInterval,
unsigned int *chunkSize,
bool *disableStagerSync,
unsigned int *gracePeriod,
bool firstTime)
throw(castor::exception::Exception) {
......@@ -332,13 +334,38 @@ void castor::gc::SynchronizationThread::readConfigFile
}
}
// Grace period size
int gracePeriodnew;
if ((value = getenv("GC_SYNCGRACEPERIOD")) ||
(value = getconfent("GC", "SyncGracePeriod", 0))) {
gracePeriodnew = atoi(value);
if (gracePeriodnew >= 0) {
if (*gracePeriod != (time_t)gracePeriodnew) {
*gracePeriod = (time_t)gracePeriodnew;
if (!firstTime) {
// "New synchronization grace period"
castor::dlf::Param params[] =
{castor::dlf::Param("GracePeriod", *gracePeriod)};
castor::dlf::dlf_writep(nullCuuid, DLF_LVL_SYSTEM, 46, 1, params);
}
}
} else {
*gracePeriod = DEFAULT_GRACEPERIOD;
// "Invalid GC/SyncGracePeriod option, using default"
castor::dlf::Param params[] =
{castor::dlf::Param("Default", *gracePeriod)};
castor::dlf::dlf_writep(nullCuuid, DLF_LVL_ERROR, 47, 1, params);
}
}
// Logging at start time
if (firstTime) {
// "Synchronization configuration"
castor::dlf::Param params[] =
{castor::dlf::Param("ChunkInterval", *chunkInterval),
castor::dlf::Param("ChunkSize", *chunkSize)};
castor::dlf::dlf_writep(nullCuuid, DLF_LVL_SYSTEM, 30, 2, params);
castor::dlf::Param("ChunkSize", *chunkSize),
castor::dlf::Param("GracePeriod", *gracePeriod)};
castor::dlf::dlf_writep(nullCuuid, DLF_LVL_SYSTEM, 30, 3, params);
}
}
......
......@@ -80,14 +80,17 @@ namespace castor {
* @param chunkSize a pointer to the chunk size value
* @param disableStagerSync a pointer to the boolean commanding disabling
* of the synchronization with the stager
* @param gracePeriod a pointer to the grace period for the synchronization
* of new files
* @param firstTime whether this is a first call. used only for logging
* purposes
*/
void readConfigFile(unsigned int *chunkInterval,
unsigned int *chunkSize,
bool *disableStagerSync,
unsigned int *gracePeriod,
bool firstTime = false)
throw(castor::exception::Exception);
throw(castor::exception::Exception);
/**
* Parse a fileName and extract the diskCopyId
......@@ -135,7 +138,6 @@ namespace castor {
/// The number of seconds to delay the first invocation of the run method
int m_startDelay;
};
} // End of namespace gc
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment