Commit ba56c266 authored by Giuseppe Lo Presti's avatar Giuseppe Lo Presti
Browse files

Fixed bug #103521: RFE: rename the TransferManager/*PendingTimeout

options to DiskManager/*PendingTimeout to improve usability.
parent 9ad25c0f
......@@ -210,21 +210,6 @@
# The value is given in seconds. Default is 5
#TransferManager AdminTimeout 5
# The following option defines how long in seconds a job is allowed to remain in a
# queue waiting for resources before being killed by the transfer manager. For
# convenience, a svcclass name of "all" can be used to define a default value for
# all service classes.
#
# The format of the value is:
# <svcclass1>:<timeout1> [svcclass2:timeout2[...]]
#TransferManager PendingTimeouts all:120 default:120
# Defines how long in seconds that a disk2disk copy job can be in a pending state
# before being terminated. This value is global for all service classes.
#TransferManager DiskCopyPendingTimeout 7200
# The SynchronizationInterval option defines how often the transfer managers check
# that jobs pending for more than 1h in the DB are still handled by the scheduling
# system. This allows to clean up inconsistencies created by double or severe failures,
......@@ -233,14 +218,12 @@
# It also checks for sources of disk to disk copy that may have been left behind due
# to timeouts in internal messages between destination on source machine
# The default value is 300, that is 5mn
#TransferManager SynchronizationInterval 300
# Number of requests read from a given connection in one go before looking at other
# connections in a given processing thread of the TransferManager. A big value will
# improve the overall performance by limiting the number of switches (and the inheritent
# queue locks) but will reduce the fairness between connections. Default is set to 10
#TransferManager RequestBatchSize 10
# Interval between two updates of the stager database with hearbeats received from
......@@ -248,7 +231,6 @@
# nodes are disabled if their heartbeat is too old (see DiskServer/HeartBeatTimeout
# config parameter in the stager database)
# Default is 1.0
#TransferManager HeartBeatDBUpdateInterval 1.0
# maximum number of slots on this node. Note that these slots are virtual and that
......@@ -270,6 +252,19 @@
#DiskManager recallWeight 1
#DiskManager migrWeight 1
# The following option defines how long in seconds a job is allowed to remain in a
# queue waiting for resources before being killed by the transfer manager. For
# convenience, a svcclass name of "all" can be used to define a default value for
# all service classes.
#
# The format of the value is:
# <svcclass1>:<timeout1> [svcclass2:timeout2[...]]
#DiskManager PendingTimeouts all:120 default:120
# Defines how long in seconds that a disk2disk copy job can be in a pending state
# before being terminated. This value is global for all service classes.
#DiskManager DiskCopyPendingTimeout 7200
# Number of free slots for which the absence of scheduling is something is in the queue
# should be considered abnormal. See ActivityControlChecker thread in the diskmanager
# daemon for more details. This should usually be put to the maximum Weight given in the
......@@ -280,14 +275,12 @@
# of the disk to disk copy when source is not ready
#DiskManager MaxRetryInterval 300
# Interval between two heartbeats send to the transfer manager, expressed in seconds.
# Default is 1.0
# Interval between two heartbeats send to the transfer manager, expressed in seconds
#DiskManager HeartbeatInterval 1.0
# Interval between two logs of heartbeat not sent errors, expressed in seconds.
# Other are logged only in debug level in order to not flood the logs when
# the transfermanagers are all down.
# Default is 300.0
#DiskManager HeartbeatNotSentLogInterval 300.0
# Number of user-requested jobs to be scheduled before a backfill job (e.g. internally
......@@ -308,31 +301,26 @@
# The interval between two checks of the GC daemon to see whether there are
# files to be removed from a diskserver. This value is represented in seconds.
#GC Interval 300
# The ChunkInterval is the interval in seconds between synchronization queries to
# the stager catalog and nameserver, i.e. the interval between two bulk checks of
# size ChunkSize. To disable all synchronization checks set this value to 0.
#GC ChunkInterval 1800
# The ChunkSize defines the number of files that the GC daemon should synchronize
# with the stager catalog and nameserver in one go. Note: the largest value is 3000.
#GC ChunkSize 2000
# By default the startup of the GC daemon is deliberately offset by a random interval
# between 1 and 15 minutes. This randomized delay should prevent all GC's in an
# castor2 instance from deleting files at the same time causing an oscillation in
# incoming network traffic due to deletions. By uncommenting this line, you force the
# incoming network traffic due to deletions. By setting this value to yes, you force the
# garbage collection to ignore this starting delay.
#GC ImmediateStart yes
#GC ImmediateStart no
# This option allows to disable the synchronization between the diskservers and the
# stager catalog. The synchronization with the nameserver is not affected.
#GC DisableStagerSync no
......
......@@ -70,7 +70,7 @@ dlf.addmessages({msgs.INVOKINGSCHEDULETRANSFER : 'Invoking scheduleTransfer',
msgs.INFORMTRANSFERISOVERFAILED : 'Failed to inform scheduler that a d2d transfer is over',
msgs.INFORMTRANSFERKILLEDFAILED : 'Failed to inform scheduler that transfer was killed by a signal',
msgs.RETRYTRANSFER : 'Retrying transfer',
msgs.INVALIDTIMEOUTOPTION : 'Invalid TransferManager/PendingTimeouts option, ignoring entry',
msgs.INVALIDTIMEOUTOPTION : 'Invalid DiskManager/PendingTimeouts option, ignoring entry',
msgs.ANYTRANSFERFROMSCHED : 'Invoking anyTransfersFromScheduler',
msgs.INVOKINGTRANSFERALREADYSTARTED : 'Invoking transferAlreadyStarted',
msgs.SYNCRUNTRANSFERFAILED : 'Exception caught when trying to synchronize running transfers with the database. Giving up',
......
......@@ -233,16 +233,16 @@ class LocalQueue(Queue.Queue):
'''Checks which transfers need to be canceled because they are queueing for too long'''
# get timeouts from configuration
timeouts = dict([entry.split(':') for entry in
self.config.getValue('TransferManager', 'PendingTimeouts', '').split()])
self.config.getValue('DiskManager', 'PendingTimeouts', '').split()])
for svcclass, timeout in timeouts.items():
try:
timeouts[svcclass] = int(timeout)
except ValueError:
del timeouts[svcclass]
# "Invalid TransferManager/PendingTimeouts option, ignoring entry" message
# "Invalid DiskManager/PendingTimeouts option, ignoring entry" message
dlf.writeerr(msgs.INVALIDTIMEOUTOPTION, SvcClass=svcclass, Timeout=timeout)
# get the disk to disk copy timeout
d2dtimeout = self.config.getValue('TransferManager', 'DiskCopyPendingTimeout', 7200, int)
d2dtimeout = self.config.getValue('DiskManager', 'DiskCopyPendingTimeout', 7200, int)
# get current time and diskserver status
currenttime = time.time()
# loop over the transfers
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment