Commit 377542bc authored by Sebastien Ponce's avatar Sebastien Ponce
Browse files

Fixed bug #105004: timeouts on transfermanagerd replies for canceled jobs are not well handled

parent 037f6307
......@@ -308,10 +308,18 @@ class ServerQueue(dict):
# we are precisely in the mentionned case. We can safely return as we already think
# that the job is running on that machine
# "Transfer starting reconfirmed" message
dlf.writedebug(msgs.TRANSFERSTARTCONFIRMED, DiskServer=transfer.diskServer,
subreqId=transfer.transferId, reqId=transfer.reqId)
dlf.write(msgs.TRANSFERSTARTCONFIRMED, DiskServer=transfer.diskServer,
subreqId=transfer.transferId, reqId=transfer.reqId)
if transfer.transferType == TransferType.D2DDST:
return self.d2dsrcrunning[transfer.transferId].srcTransfer
try:
return self.d2dsrcrunning[transfer.transferId].srcTransfer
except KeyError:
# In this special case, the reconfirmation reconfirms that the job
# was canceled, not that it can run. This is made clear by the fact
# that the source has been cleaned up while it should be running
dlf.write(msgs.TRANSFERCANCELEDCONFIRMED, DiskServer=transfer.diskServer,
subreqId=transfer.transferId, reqId=transfer.reqId)
raise ValueError("Request canceled while queueing and retried due to timeout")
else:
return
# The transfer has really started somewhere else. Let the diskServer know by raising an exception
......
......@@ -48,6 +48,7 @@ msgs = dlf.enum('ABORTEREXCEPTION', 'SYNCHROFAILED', 'SYNCHROEXCEPTION',
'TRANSFERMANAGERDSTARTED', 'TRANSFERMANAGERDSTOPPED', 'NOD2DLEFTBEHIND',
'D2DSYNCFAILED', 'SYNCHROENDEDTRANSFER', 'INVOKINGGETALLRUNNINGD2DSOURCETRANSFERS',
'SYNCDBWITHD2DSRC', 'COULDNOTCONTACTTM', 'TRANSFERSTARTCONFIRMED',
'TRANSFERCANCELEDCONFIRMED',
'D2DENDEXCEPTION', 'D2DDESTRESTARTERROR', 'INVOKINGTRANSFERBACKTOQUEUE',
'TRANSFERSRCCANCELED', 'REPORTMANAGEREXCEPTION', 'INVOKINGMODIFYDISKSERVERS',
'MODIFYDISKSERVERSEXCEPTION', 'INITQUEUES', 'INITQUEUESENDED',
......@@ -113,6 +114,7 @@ dlf.addmessages({msgs.ABORTEREXCEPTION : 'Caught exception in Aborter thread',
msgs.SYNCDBWITHD2DSRC : 'Synchronizing stager DB with running d2d sources',
msgs.COULDNOTCONTACTTM : 'Could not contact transfer manager',
msgs.TRANSFERSTARTCONFIRMED : 'Transfer starting reconfirmed',
msgs.TRANSFERCANCELEDCONFIRMED : 'Transfer starting just reconfirmed was actually cancelation',
msgs.D2DENDEXCEPTION : 'Unable to end d2d as it\'s not in the server list. Probable race condition',
msgs.D2DDESTRESTARTERROR : 'Unable to put d2ddest back in queue as sources are missing. Probable race condition',
msgs.INVOKINGTRANSFERBACKTOQUEUE : 'Invoking transferBackToQueue',
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment