Commit 73a9c407 authored by Steven Murray's avatar Steven Murray
Browse files

Added first draft version of cta-fst-gc

parent 780ebd75
......@@ -143,6 +143,7 @@ ELSE(DEFINED PackageOnly)
add_subdirectory(eos)
add_subdirectory(mediachanger)
add_subdirectory(objectstore)
add_subdirectory(python)
add_subdirectory(rdbms)
add_subdirectory(scheduler)
add_subdirectory(tapeserver)
......@@ -169,6 +170,7 @@ if (${COMPILE_PACKAGING} STREQUAL "1")
RPMTools_ADD_RPM_TARGETS(
${PROJECT_NAME} ${PROJECT_NAME}.spec.in)
endif (RPMTools_FOUND)
add_dependencies(cta_rpm cta_python_package_rpm)
endif (${COMPILE_PACKAGING} STREQUAL "1")
#add_custom_target(test test/castorUnitTests
......
# The CERN Tape Archive(CTA) project
# Copyright(C) 2015 CERN
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# This module will set the following variables:
# PYTHON_FOUND
# PYTHON_PROGRAM
find_program(PYTHON_PROGRAM python)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(python DEFAULT_MSG
PYTHON_PROGRAM)
......@@ -383,3 +383,26 @@ Common items such as the creation of the cta local user and /var/log/cta
%files -n cta-common
%defattr(-,root,root)
%attr(0755,cta,tape) %dir /var/log/cta
%package -n cta-fst-gcd
Summary: Tape aware garbage collector daemon to run on an EOS FST
Group: Application/CTA
Requires: eos-client
Requires: python
Requires: python-cta = %{version}-%{release}
%description -n cta-fst-gcd
cta-fst-gcd is a daemon that runs on an EOS FST and garbage
collects EOS disk copies that have been safely stored to tape.
%files -n cta-fst-gcd
%defattr(-,root,root)
%attr(0755,root,root) /usr/bin/cta-fst-gcd
%attr(0644,root,root) %doc /usr/share/man/man1/cta-fst-gcd.1cta.gz
%attr(0644,root,root) %config(noreplace) /etc/systemd/system/cta-fst-gcd.service
%post -n cta-fst-gcd
%systemd_post cta-fst-gcd.service
%systemdDaemonReload
%preun -n cta-fst-gcd
%systemd_preun cta-fst-gcd.service
%postun -n cta-fst-gcd
%systemd_postun cta-fst-gcd.service
%systemdDaemonReload
# The CERN Tape Archive (CTA) project
# Copyright (C) 2015 CERN
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
cmake_minimum_required (VERSION 2.6)
#install (PROGRAMS cta-fst-gcd DESTINATION usr/bin)
#install (FILES ${CMAKE_CURRENT_SOURCE_DIR}/cta-fst-gcd.1cta DESTINATION /usr/share/man/man1)
add_subdirectory(eosfstgcd)
add_subdirectory(package)
# The CERN Tape Archive (CTA) project
# Copyright (C) 2015 CERN
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
cmake_minimum_required (VERSION 2.6)
install (PROGRAMS cta-fst-gcd DESTINATION usr/bin)
install (FILES ${CMAKE_CURRENT_SOURCE_DIR}/cta-fst-gcd.1cta DESTINATION /usr/share/man/man1)
install (FILES cta-fst-gcd.service DESTINATION /etc/systemd/system)
#!/bin/python
# The CERN Tape Archive (CTA) project
# Copyright (C) 2015 CERN
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import cta.fst.gc
cta.fst.gc.main()
.\" The CERN Tape Archive (CTA) project
.\" Copyright (C) 2015 CERN
.\"
.\" This program is free software: you can redistribute it and/or modify
.\" it under the terms of the GNU General Public License as published by
.\" the Free Software Foundation, either version 3 of the License, or
.\" (at your option) any later version.
.\"
.\" This program is distributed in the hope that it will be useful,
.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
.\" GNU General Public License for more details.
.\"
.\" You should have received a copy of the GNU General Public License
.\" along with this program. If not, see <http://www.gnu.org/licenses/>.
.TH CTA-CATALOGUE-SCHEMA-CREATE 1CTA "August 2016" CTA CTA
.SH NAME
cta-fst-gcd \- Tape aware garbage collector daemon to run on an EOS FST
.SH SYNOPSIS
.BI "cta-fst-gcd [options]"
.SH DESCRIPTION
\fBcta-fst-gcd\fP is a daemon that runs on an EOS FST and garbage
collects EOS disk copies that have been safely stored to tape.
.P
The \fBcta-fst-gcd\fP daemon scans across every single EOS disk file on
an FST. A file is garbage collected if:
.IP \[bu] 2
The amount of free space on the corresponding file system is considered
too low.
.IP \[bu]
The file is considered old enough to be garbage collected.
.P
The \fBcta-fst-gcd\fP daemon garbage collects an EOS disk file by
extracting the hexadecimal EOS file identifier from the local disk
filename and then running \fBeos stagerm fxid:<fid-hex>\fP.
.P
.SH OPTIONS
.TP
\fB\-h, \-\-help
Prints the usage message.
.SH RETURN VALUE
Zero on success and non-zero on failure.
.SH AUTHOR
\fBCTA\fP Team
[Unit]
Description=Tape aware garbage collector daemon to run on an EOS FST
[Service]
ExecStart=/usr/bin/cta-fst-gcd
LimitCORE=infinity
Type=simple
Restart=no
User=daemon
Group=daemon
WorkingDirectory=/tmp
[Install]
WantedBy=default.target
# The CERN Tape Archive (CTA) project
# Copyright (C) 2015 CERN
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
cmake_minimum_required (VERSION 2.6)
find_package (python REQUIRED)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py)
set (CTA_PYTHON_PACKAGE_SRC_FILES
${CMAKE_CURRENT_SOURCE_DIR}/cta/__init__.py
${CMAKE_CURRENT_SOURCE_DIR}/cta/fst/__init__.py
${CMAKE_CURRENT_SOURCE_DIR}/cta/fst/gc.py
${CMAKE_CURRENT_SOURCE_DIR}/cta/exceptions.py)
set (CTA_PYTHON_PACKAGE_RPMS
${CMAKE_CURRENT_BINARY_DIR}/dist/python-cta-${CTA_VERSION}.${CTA_RELEASE}.noarch.rpm
${CMAKE_CURRENT_BINARY_DIR}/dist/python-cta-${CTA_VERSION}.${CTA_RELEASE}.src.rpm)
add_custom_command(OUTPUT ${CTA_PYTHON_PACKAGE_RPMS}
COMMAND ${PYTHON_PROGRAM} ${CMAKE_CURRENT_BINARY_DIR}/setup.py bdist_rpm --release ${CTA_RELEASE}.el7.cern
DEPENDS ${CTA_PYTHON_PACKAGE_SRC_FILES})
add_custom_target(cta_python_package_rpm DEPENDS ${CTA_PYTHON_PACKAGE_RPMS})
This diff is collapsed.
The tape aware FST garbage collector implemented under the CERN Tape Archive (CTA) project.
# The CERN Tape Archive (CTA) project
# Copyright (C) 2015 CERN
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# The CERN Tape Archive (CTA) project
# Copyright (C) 2015 CERN
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
class UserError(Exception):
pass
# The CERN Tape Archive (CTA) project
# Copyright (C) 2015 CERN
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#!/bin/python
# The CERN Tape Archive (CTA) project
# Copyright (C) 2015 CERN
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import argparse
import datetime
import getpass
import logging
import logging.config
import os
import re
import socket
import subprocess
import sys
import time
from cta.exceptions import UserError
class Gc:
def get_env_mgmhost(self):
if "EOS_MGM_URL" in self.env:
mgm_url = self.env["EOS_MGM_URL"]
if mgm_url:
return re.sub("^x?root://", "", mgm_url)
def get_sysconfig_file_mgmhost(self, sysconfig_file):
for line in sysconfig_file:
mgmhostline = re.match("^EOS_MGM_HOST=.*", line)
if mgmhostline:
splitmgmhostline = mgmhostline.group(0).split('=')
if 2 == len(splitmgmhostline):
return splitmgmhostline[1]
def get_syconfig_mgmhost(self):
if os.path.isfile("/etc/sysconfig/eos_env"):
sysconfig_file = open("/etc/sysconfig/eos_env", "r")
return self.get_sysconfig_file_mgmhost(sysconfig_file)
def setmgmhost(self):
if self.mgmhost:
return
self.mgmhost = self.get_env_mgmhost()
if self.mgmhost:
return
self.mgmhost = self.get_syconfig_mgmhost()
if self.mgmhost:
return
raise Exception("Failed to determine the MGM host")
def configureDummyLogging(self):
config = {
'version': 1,
'disable_existing_loggers': False,
'loggers': {
'gc' : {
'level': 'INFO'
}
}
}
logging.config.dictConfig(config)
def configureFileBasedLogging(self):
if None == self.logfilepath:
raise Exception("Cannot configure file based logging because the log file path has not been set")
loggingdir = os.path.dirname(self.logfilepath)
if not os.path.isdir(loggingdir):
raise UserError("The logging directory {} does is not a directory or does not exist".format(loggingdir))
if not os.access(loggingdir, os.W_OK):
raise UserError("The logging directory {} cannot be written to by {}".format(loggingdir, self.programname))
config = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'stdout': {
'format': '%(asctime)s.%(msecs)03d000 %(levelname)s ' + self.programname +
': LVL="%(levelname)s" PID="%(process)d" TID="%(process)d" MSG="%(message)s"',
'datefmt': '%Y/%m/%d %H:%M:%S'
}
},
'handlers': {
'logfile': {
'level': 'INFO',
'formatter': 'stdout',
'class': 'logging.handlers.TimedRotatingFileHandler',
'filename' : self.logfilepath,
'when' : 'midnight'
}
},
'loggers': {
'gc' : {
'handlers': ['logfile'],
'level': 'INFO'
}
}
}
# Failing to configure the logging system is usually a user error
try:
logging.config.dictConfig(config)
except Exception as err:
raise UserError(err)
def configureLogging(self):
if None == self.logfilepath:
self.configureDummyLogging()
else:
self.configureFileBasedLogging()
def __init__(self, programname, env, minfreebytes, gcagesecs, cmdline_mgmhost = None,
logfilepath = '/var/log/eos/fst/cta-fst-gcd.log'):
self.programname = programname
self.env = env
self.minfreebytes = minfreebytes
self.gcagesecs = gcagesecs
self.cmdline_mgmhost = cmdline_mgmhost
self.logfilepath = logfilepath
self.fqdn = socket.getfqdn()
self.mgmhost = cmdline_mgmhost
self.localfilesystempaths = []
def eosfsls(self, mgmhost):
mgmurl = "root://{}".format(mgmhost)
cmd = "eos -r 0 0 {} fs ls -m".format(mgmurl)
env = os.environ.copy()
env["XrdSecPROTOCOL"] = "sss"
process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
stdout,stderr = process.communicate()
if 0 != process.returncode:
raise Exception(
"\n"
"Failed to to execute: {}\n"
"Return code : {}\n"
"Return code strerror: {}\n"
"Standard error : {}".format(cmd, process.returncode, os.strerror(process.returncode), stderr))
result = []
lines = stdout.splitlines();
for l in lines:
linedict = {}
pairs = l.split()
for p in pairs:
splitpair = p.split('=')
if 2 == len(splitpair):
linedict[splitpair[0]] = splitpair[1]
if linedict:
result.append(linedict)
return result
def eosstagerrm(self, mgmhost, fxid):
logger = logging.getLogger('gc')
mgmurl = "root://{}".format(mgmhost)
cmd = "eos {} stagerrm fxid:{}".format(mgmurl, fxid)
env = os.environ.copy()
env["XrdSecPROTOCOL"] = "sss"
process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
stdout,stderr = process.communicate()
if 0 == process.returncode:
logger.info("Executed {}".format(cmd))
def processfile(self, subdir, fstfile):
statvfs = os.statvfs(subdir)
spaceshouldefreed = statvfs.f_frsize * statvfs.f_bavail > self.minfreebytes
if spaceshouldefreed:
fullpath = os.path.join(subdir,fstfile)
statinfo = os.stat(fullpath)
now = time.time()
agesecs = now - statinfo.st_ctime
if agesecs > self.gcagesecs:
self.eosstagerrm(self.mgmhost, fstfile)
def processfssubdir(self, subdir):
logger = logging.getLogger('gc')
fstfiles = [f for f in os.listdir(subdir)
if re.match('^[0-9A-Fa-f]{8}$', f) and os.path.isfile(os.path.join(subdir, f))]
for fstfile in fstfiles:
self.processfile(subdir, fstfile)
def processfs(self, path):
fssubdirs = [os.path.join(path, f) for f in os.listdir(path)
if re.match('^[0-9A-Fa-f]{8}$', f) and os.path.isdir(os.path.join(path, f))]
for fssubdir in fssubdirs:
self.processfssubdir(fssubdir)
def logfilesystempaths(self):
logger = logging.getLogger('gc')
logger.info('Number of local file systems is {}'.format(len(self.localfilesystempaths)))
i = 0
for path in self.localfilesystempaths:
logger.info('Local file system {}: {}'.format(i, path))
i = i + 1
def processallfs(self):
filesystems = self.eosfsls(self.mgmhost)
newlocalfilesystempaths = [fs["path"] for fs in filesystems if "path" in fs and "host" in fs and self.fqdn == fs["host"]]
if newlocalfilesystempaths != self.localfilesystempaths:
self.localfilesystempaths = newlocalfilesystempaths
self.logfilesystempaths();
for path in self.localfilesystempaths:
self.processfs(path)
def logconf(self):
logger = logging.getLogger('gc')
logger.info("config minfreebytes={}".format(self.minfreebytes))
logger.info("config gcagesecs={}".format(self.gcagesecs))
logger.info("config mgmhost={}".format(self.mgmhost))
logger.info("config logfilepath={}".format(self.logfilepath))
def run(self):
username = getpass.getuser()
if 'daemon' != username:
raise UserError('{} must be executed as user daemon and not user {}'.format(self.programname, username))
self.setmgmhost()
self.configureLogging()
logger = logging.getLogger('gc')
logger.info('{} started'.format(self.programname))
logger.info('The fqdn of this machine is {}'.format(self.fqdn))
self.logconf()
minperiod = 300 # In seconds
while True:
before = time.time()
self.processallfs()
after = time.time()
period = after - before
if period < minperiod:
sleeptime = minperiod - period
logger.debug('Sleeping {} seconds'.format(sleeptime))
time.sleep(sleeptime)
def main():
programname = 'cta-fst-gcd'
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--minfreebytes", help="The minimum amount of free space in bytes", type=int, default=10*1000*1000*1000)
parser.add_argument("-a", "--gcagesecs", help="The age in seconds that a file must have in order to be considered for garbage collection", type=int, default=2*60*60)
parser.add_argument("-m", "--mgmhost", help="The EOS MGM host")
args = parser.parse_args()
gc = Gc(programname, os.environ, args.minfreebytes, args.mgmhost)
try:
gc.run()
except UserError as err:
print "User error: {}".format(err)
if __name__ == '__main__':
main()
#!/bin/python
# The CERN Tape Archive (CTA) project
# Copyright (C) 2015 CERN
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import cStringIO
import unittest
from cta.fst.gc import Gc
class GcTestCase(unittest.TestCase):
def setUp(self):
self.programname = "test"
self.minfreebytes = 1
self.gcagesecs = 1
self.cmdline_mgmhost = None
self.logfilepath = None
def tearDown(self):
pass
def test_get_env_mgmhost_hostname(self):
hostname = "hostname";
env = {"EOS_MGM_URL" : hostname}
gc = Gc(self.programname, env, self.minfreebytes, self.gcagesecs, self.cmdline_mgmhost, self.logfilepath)
self.assertEqual(hostname, gc.get_env_mgmhost())
def test_get_env_mgmhost_root_hostname(self):
hostname = "hostname";
env = {"EOS_MGM_URL" : "root://" + hostname}
gc = Gc(self.programname, env, self.minfreebytes, self.gcagesecs, self.cmdline_mgmhost, self.logfilepath)
self.assertEqual(hostname, gc.get_env_mgmhost())