Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
dCache
cta
Commits
24f8436a
Commit
24f8436a
authored
Jan 14, 2021
by
Cedric Caffy
Browse files
[cta-taped] Drive is put down if the CleanerSession fails to eject the tape from the drive
parent
686bc9c9
Changes
6
Hide whitespace changes
Inline
Side-by-side
tapeserver/castor/tape/tapeserver/daemon/CleanerSession.cpp
View file @
24f8436a
...
...
@@ -38,7 +38,8 @@ castor::tape::tapeserver::daemon::CleanerSession::CleanerSession(
const
bool
waitMediaInDrive
,
const
uint32_t
waitMediaInDriveTimeout
,
const
std
::
string
&
externalEncryptionKeyScript
,
cta
::
catalogue
::
Catalogue
&
catalogue
)
:
cta
::
catalogue
::
Catalogue
&
catalogue
,
cta
::
Scheduler
&
scheduler
)
:
m_capUtils
(
capUtils
),
m_mc
(
mc
),
m_log
(
log
),
...
...
@@ -48,7 +49,8 @@ castor::tape::tapeserver::daemon::CleanerSession::CleanerSession(
m_waitMediaInDrive
(
waitMediaInDrive
),
m_waitMediaInDriveTimeout
(
waitMediaInDriveTimeout
),
m_encryptionControl
(
externalEncryptionKeyScript
),
m_catalogue
(
catalogue
)
m_catalogue
(
catalogue
),
m_scheduler
(
scheduler
)
{}
//------------------------------------------------------------------------------
...
...
@@ -68,15 +70,49 @@ castor::tape::tapeserver::daemon::Session::EndOfSessionAction
errorMessage
=
"Caught an unknown exception"
;
}
//
Reaching this point means the cleaner failed and an exception was thrown
//Reaching this point means the cleaner failed and an exception was thrown
std
::
list
<
cta
::
log
::
Param
>
params
=
{
cta
::
log
::
Param
(
"tapeVid"
,
m_vid
),
cta
::
log
::
Param
(
"tapeDrive"
,
m_driveConfig
.
unitName
),
cta
::
log
::
Param
(
"message"
,
errorMessage
)};
m_log
(
cta
::
log
::
ERR
,
"Cleaner failed"
,
params
);
m_log
(
cta
::
log
::
ERR
,
"Cleaner failed. Putting the drive down."
,
params
);
//Putting the drive down
try
{
setDriveDownAfterCleanerFailed
(
std
::
string
(
"Cleaner failed. "
)
+
errorMessage
);
}
catch
(
const
cta
::
exception
::
Exception
&
ex
)
{
std
::
list
<
cta
::
log
::
Param
>
params
=
{
cta
::
log
::
Param
(
"tapeVid"
,
m_vid
),
cta
::
log
::
Param
(
"tapeDrive"
,
m_driveConfig
.
unitName
),
cta
::
log
::
Param
(
"message"
,
ex
.
getMessageValue
())};
m_log
(
cta
::
log
::
ERR
,
"Cleaner failed. Failed to put the drive down"
,
params
);
}
return
MARK_DRIVE_AS_DOWN
;
}
void
castor
::
tape
::
tapeserver
::
daemon
::
CleanerSession
::
setDriveDownAfterCleanerFailed
(
const
std
::
string
&
errorMsg
)
{
std
::
string
logicalLibrary
=
m_driveConfig
.
logicalLibrary
;
std
::
string
hostname
=
cta
::
utils
::
getShortHostname
();
std
::
string
driveName
=
m_driveConfig
.
unitName
;
cta
::
common
::
dataStructures
::
DriveInfo
driveInfo
;
driveInfo
.
driveName
=
driveName
;
driveInfo
.
logicalLibrary
=
logicalLibrary
;
driveInfo
.
host
=
hostname
;
cta
::
log
::
LogContext
lc
(
m_log
);
m_scheduler
.
reportDriveStatus
(
driveInfo
,
cta
::
common
::
dataStructures
::
MountType
::
NoMount
,
cta
::
common
::
dataStructures
::
DriveStatus
::
Down
,
lc
);
cta
::
common
::
dataStructures
::
SecurityIdentity
cliId
;
cta
::
common
::
dataStructures
::
DesiredDriveState
driveState
;
driveState
.
up
=
false
;
driveState
.
forceDown
=
false
;
driveState
.
setReasonFromLogMsg
(
cta
::
log
::
ERR
,
errorMsg
);
m_scheduler
.
setDesiredDriveState
(
cliId
,
m_driveConfig
.
unitName
,
driveState
,
lc
);
}
//------------------------------------------------------------------------------
// exceptionThrowingExecute
//------------------------------------------------------------------------------
...
...
tapeserver/castor/tape/tapeserver/daemon/CleanerSession.hpp
View file @
24f8436a
...
...
@@ -34,6 +34,7 @@
#include
"tapeserver/castor/tape/tapeserver/SCSI/Device.hpp"
#include
"tapeserver/castor/tape/tapeserver/daemon/EncryptionControl.hpp"
#include
"catalogue/Catalogue.hpp"
#include
"scheduler/Scheduler.hpp"
#include
<memory>
...
...
@@ -75,7 +76,8 @@ namespace daemon {
const
bool
waitMediaInDrive
,
const
uint32_t
waitMediaInDriveTimeout
,
const
std
::
string
&
externalEncryptionKeyScript
,
cta
::
catalogue
::
Catalogue
&
catalogue
);
cta
::
catalogue
::
Catalogue
&
catalogue
,
cta
::
Scheduler
&
scheduler
);
/**
* Execute the session and return the type of action to be performed
...
...
@@ -141,6 +143,11 @@ namespace daemon {
*/
cta
::
catalogue
::
Catalogue
&
m_catalogue
;
/**
* CTA scheduler
*/
cta
::
Scheduler
&
m_scheduler
;
/**
* Variable used to log UPDATE_USER_NAME in the DB
*/
...
...
@@ -232,6 +239,11 @@ namespace daemon {
*/
void
dismountTape
(
const
std
::
string
&
vid
);
/**
* Put the drive down in case the Cleaner has failed
*/
void
setDriveDownAfterCleanerFailed
(
const
std
::
string
&
errorMsg
);
};
// class CleanerSession
}
// namespace daemon
...
...
tapeserver/castor/tape/tapeserver/daemon/DataTransferSession.cpp
View file @
24f8436a
...
...
@@ -147,7 +147,7 @@ schedule:
cta
::
common
::
dataStructures
::
DesiredDriveState
driveState
;
driveState
.
up
=
false
;
driveState
.
forceDown
=
false
;
std
::
string
errorMsg
=
"A tape was detected in the drive. Putting the drive
back
down."
;
std
::
string
errorMsg
=
"A tape was detected in the drive. Putting the drive down."
;
int
logLevel
=
cta
::
log
::
ERR
;
driveState
.
setReasonFromLogMsg
(
logLevel
,
errorMsg
);
m_scheduler
.
setDesiredDriveState
(
securityIdentity
,
m_driveConfig
.
unitName
,
driveState
,
lc
);
...
...
tapeserver/castor/tape/tapeserver/daemon/DataTransferSessionTest.cpp
View file @
24f8436a
...
...
@@ -56,6 +56,7 @@
#include
"scheduler/testingMocks/MockArchiveMount.hpp"
#include
"tests/TempFile.hpp"
#include
"objectstore/BackendRadosTestSwitch.hpp"
#include
"CleanerSession.hpp"
#include
<dirent.h>
#include
<fcntl.h>
...
...
@@ -2589,6 +2590,119 @@ TEST_P(DataTransferSessionTest, DataTransferSessionTapeFullOnFlushMigration) {
"mountTotalReadRetries=
\"
25
\"
mountTotalWriteRetries=
\"
25
\"
mountWriteTransients=
\"
10
\"
"
));
}
TEST_P
(
DataTransferSessionTest
,
CleanerSessionFailsShouldPutTheDriveDown
)
{
// 0) Prepare the logger for everyone
cta
::
log
::
StringLogger
logger
(
"dummy"
,
"tapeServerUnitTest"
,
cta
::
log
::
DEBUG
);
cta
::
log
::
LogContext
logContext
(
logger
);
setupDefaultCatalogue
();
// 1) prepare the fake scheduler
// cta::MountType::Enum mountType = cta::MountType::RETRIEVE;
// 3) Prepare the necessary environment (logger, plus system wrapper),
castor
::
tape
::
System
::
mockWrapper
mockSys
;
mockSys
.
delegateToFake
();
mockSys
.
disableGMockCallsCounting
();
mockSys
.
fake
.
setupForVirtualDriveSLC6
();
// 4) Create the scheduler
auto
&
catalogue
=
getCatalogue
();
auto
&
scheduler
=
getScheduler
();
// Always use the same requester
const
cta
::
common
::
dataStructures
::
SecurityIdentity
requester
(
"user"
,
"group"
);
// List to remember the path of each remote file so that the existance of the
// files can be tested for at the end of the test
std
::
list
<
std
::
string
>
remoteFilePaths
;
// 5) Create the environment for the migration to happen (library + tape)
const
std
::
string
libraryComment
=
"Library comment"
;
const
bool
libraryIsDisabled
=
false
;
catalogue
.
createLogicalLibrary
(
s_adminOnAdminHost
,
s_libraryName
,
libraryIsDisabled
,
libraryComment
);
{
auto
libraries
=
catalogue
.
getLogicalLibraries
();
ASSERT_EQ
(
1
,
libraries
.
size
());
ASSERT_EQ
(
s_libraryName
,
libraries
.
front
().
name
);
ASSERT_EQ
(
libraryComment
,
libraries
.
front
().
comment
);
}
const
std
::
string
tapeComment
=
"Tape comment"
;
bool
notDisabled
=
false
;
bool
notFull
=
false
;
bool
notReadOnly
=
false
;
{
cta
::
catalogue
::
CreateTapeAttributes
tape
;
tape
.
vid
=
s_vid
;
tape
.
mediaType
=
s_mediaType
;
tape
.
vendor
=
s_vendor
;
tape
.
logicalLibraryName
=
s_libraryName
;
tape
.
tapePoolName
=
s_tapePoolName
;
tape
.
full
=
notFull
;
tape
.
disabled
=
notDisabled
;
tape
.
readOnly
=
notReadOnly
;
tape
.
comment
=
tapeComment
;
catalogue
.
createTape
(
s_adminOnAdminHost
,
tape
);
}
// Create the mount criteria
catalogue
.
createMountPolicy
(
requester
,
"immediateMount"
,
1000
,
0
,
1000
,
0
,
1
,
"Policy comment"
);
catalogue
.
createRequesterMountRule
(
requester
,
"immediateMount"
,
s_diskInstance
,
requester
.
username
,
"Rule comment"
);
//delete is unnecessary
//pointer with ownership will be passed to the application,
//which will do the delete
const
uint64_t
tapeSize
=
5000
;
mockSys
.
fake
.
m_pathToDrive
[
"/dev/nst0"
]
=
new
castor
::
tape
::
tapeserver
::
drive
::
FakeDrive
(
tapeSize
,
castor
::
tape
::
tapeserver
::
drive
::
FakeDrive
::
OnFlush
);
// Report the drive's existence and put it up in the drive register.
cta
::
tape
::
daemon
::
TpconfigLine
driveConfig
(
"T10D6116"
,
"TestLogicalLibrary"
,
"/dev/tape_T10D6116"
,
"manual"
);
cta
::
common
::
dataStructures
::
DriveInfo
driveInfo
;
driveInfo
.
driveName
=
driveConfig
.
unitName
;
driveInfo
.
logicalLibrary
=
driveConfig
.
logicalLibrary
;
driveInfo
.
host
==
"host"
;
// We need to create the drive in the registry before being able to put it up.
scheduler
.
reportDriveStatus
(
driveInfo
,
cta
::
common
::
dataStructures
::
MountType
::
NoMount
,
cta
::
common
::
dataStructures
::
DriveStatus
::
Down
,
logContext
);
cta
::
common
::
dataStructures
::
DesiredDriveState
driveState
;
driveState
.
up
=
true
;
driveState
.
forceDown
=
false
;
scheduler
.
setDesiredDriveState
(
s_adminOnAdminHost
,
driveConfig
.
unitName
,
driveState
,
logContext
);
// Create cleaner session
DataTransferConfig
castorConf
;
castorConf
.
bufsz
=
1024
*
1024
;
// 1 MB memory buffers
castorConf
.
nbBufs
=
10
;
castorConf
.
bulkRequestRecallMaxBytes
=
UINT64_C
(
100
)
*
1000
*
1000
*
1000
;
castorConf
.
bulkRequestRecallMaxFiles
=
1000
;
castorConf
.
bulkRequestMigrationMaxBytes
=
UINT64_C
(
100
)
*
1000
*
1000
*
1000
;
castorConf
.
bulkRequestMigrationMaxFiles
=
1000
;
castorConf
.
nbDiskThreads
=
1
;
cta
::
log
::
DummyLogger
dummyLog
(
"dummy"
,
"dummy"
);
cta
::
mediachanger
::
MediaChangerFacade
mc
(
dummyLog
);
cta
::
server
::
ProcessCapDummy
capUtils
;
castor
::
messages
::
TapeserverProxyDummy
initialProcess
;
CleanerSession
cleanerSession
(
capUtils
,
mc
,
logger
,
driveConfig
,
mockSys
,
s_vid
,
false
,
0
,
""
,
catalogue
,
scheduler
);
auto
endOfSessionAction
=
cleanerSession
.
execute
();
//the tape has not been labeled so the cleanerSession should have failed and put the drive down.
cta
::
common
::
dataStructures
::
DesiredDriveState
newDriveState
=
scheduler
.
getDesiredDriveState
(
driveConfig
.
unitName
,
logContext
);
ASSERT_FALSE
(
newDriveState
.
up
);
ASSERT_EQ
(
castor
::
tape
::
tapeserver
::
daemon
::
Session
::
MARK_DRIVE_AS_DOWN
,
endOfSessionAction
);
}
#undef TEST_MOCK_DB
#ifdef TEST_MOCK_DB
static
cta
::
MockSchedulerDatabaseFactory
mockDbFactory
;
...
...
tapeserver/daemon/DriveHandler.cpp
View file @
24f8436a
...
...
@@ -1035,6 +1035,21 @@ int DriveHandler::runChild() {
// sleep(1);
// return castor::tape::tapeserver::daemon::Session::MARK_DRIVE_AS_DOWN;
// }
try
{
scheduler
.
ping
(
lc
);
}
catch
(
const
cta
::
catalogue
::
WrongSchemaVersionException
&
ex
)
{
log
::
ScopedParamContainer
param
(
lc
);
param
.
add
(
"errorMessage"
,
ex
.
getMessageValue
());
lc
.
log
(
log
::
CRIT
,
"In DriveHandler::runChild() before cleanerSession: catalogue MAJOR version mismatch. Reporting fatal error."
);
driveHandlerProxy
.
reportState
(
tape
::
session
::
SessionState
::
Fatal
,
tape
::
session
::
SessionType
::
Undetermined
,
""
);
return
castor
::
tape
::
tapeserver
::
daemon
::
Session
::
MARK_DRIVE_AS_DOWN
;
}
catch
(
cta
::
exception
::
Exception
&
ex
)
{
log
::
ScopedParamContainer
param
(
lc
);
param
.
add
(
"errorMessage"
,
ex
.
getMessageValue
());
lc
.
log
(
log
::
CRIT
,
"In DriveHandler::runChild() before cleanerSession: failed to ping central storage before session. Reporting fatal error."
);
driveHandlerProxy
.
reportState
(
tape
::
session
::
SessionState
::
Fatal
,
tape
::
session
::
SessionType
::
Undetermined
,
""
);
return
castor
::
tape
::
tapeserver
::
daemon
::
Session
::
MARK_DRIVE_AS_DOWN
;
}
castor
::
tape
::
tapeserver
::
daemon
::
CleanerSession
cleanerSession
(
capUtils
,
...
...
@@ -1046,7 +1061,8 @@ int DriveHandler::runChild() {
true
,
60
,
""
,
*
m_catalogue
);
*
m_catalogue
,
scheduler
);
return
cleanerSession
.
execute
();
}
else
{
// The next session will be a normal session (no crash with a mounted tape before).
...
...
@@ -1157,29 +1173,68 @@ int DriveHandler::runChild() {
//------------------------------------------------------------------------------
SubprocessHandler
::
ProcessingStatus
DriveHandler
::
shutdown
()
{
// TODO: improve in the future (preempt the child process)
log
::
ScopedParamContainer
params
(
m_processManager
.
logContext
());
auto
&
lc
=
m_processManager
.
logContext
();
log
::
ScopedParamContainer
params
(
lc
);
params
.
add
(
"tapeDrive"
,
m_configLine
.
unitName
);
m_processManager
.
logContext
()
.
log
(
log
::
INFO
,
"In DriveHandler::shutdown(): simply killing the process."
);
lc
.
log
(
log
::
INFO
,
"In DriveHandler::shutdown(): simply killing the process."
);
kill
();
std
::
set
<
SessionState
>
statesRequiringCleaner
=
{
SessionState
::
Mounting
,
SessionState
::
Running
,
SessionState
::
Unmounting
};
if
(
statesRequiringCleaner
.
count
(
m_sessionState
))
{
if
(
!
m_sessionVid
.
size
())
{
m_processManager
.
logContext
()
.
log
(
log
::
ERR
,
"In DriveHandler::shutdown(): Should run cleaner but VID is missing. Do
not
nothing."
);
lc
.
log
(
log
::
ERR
,
"In DriveHandler::shutdown(): Should run cleaner but VID is missing. Do nothing."
);
}
else
{
log
::
ScopedParamContainer
params
(
m_processManager
.
logContext
());
params
.
add
(
"tapeVid"
,
m_sessionVid
)
.
add
(
"tapeDrive"
,
m_configLine
.
unitName
)
.
add
(
"sessionState"
,
session
::
toString
(
m_sessionState
))
.
add
(
"sessionType"
,
session
::
toString
(
m_sessionType
));
m_processManager
.
logContext
()
.
log
(
log
::
INFO
,
"In DriveHandler::shutdown(): starting cleaner."
);
lc
.
log
(
log
::
INFO
,
"In DriveHandler::shutdown(): starting cleaner."
);
// Capabilities management.
cta
::
server
::
ProcessCap
capUtils
;
// Mounting management.
if
(
!
m_catalogue
)
m_catalogue
=
createCatalogue
(
"DriveHandler::shutdown()"
);
//Create the scheduler
//Create the backend
std
::
unique_ptr
<
cta
::
objectstore
::
Backend
>
backend
;
try
{
backend
.
reset
(
cta
::
objectstore
::
BackendFactory
::
createBackend
(
m_tapedConfig
.
backendPath
.
value
(),
lc
.
logger
()).
release
());
}
catch
(
cta
::
exception
::
Exception
&
ex
)
{
log
::
ScopedParamContainer
param
(
lc
);
param
.
add
(
"errorMessage"
,
ex
.
getMessageValue
());
lc
.
log
(
log
::
CRIT
,
"In DriveHandler::shutdown(): failed to connect to objectstore."
);
goto
exitShutdown
;
}
// If the backend is a VFS, make sure we don't delete it on exit.
// If not, nevermind.
try
{
dynamic_cast
<
cta
::
objectstore
::
BackendVFS
&>
(
*
backend
).
noDeleteOnExit
();
}
catch
(
std
::
bad_cast
&
){}
// Create the agent entry in the object store. This could fail (even before ping, so
// handle failure like a ping failure).
std
::
unique_ptr
<
cta
::
objectstore
::
BackendPopulator
>
backendPopulator
;
std
::
unique_ptr
<
cta
::
OStoreDBWithAgent
>
osdb
;
try
{
std
::
string
processName
=
"DriveHandlerShutdown-"
;
processName
+=
m_configLine
.
unitName
;
log
::
ScopedParamContainer
params
(
lc
);
params
.
add
(
"processName"
,
processName
);
lc
.
log
(
log
::
DEBUG
,
"In DriveHandler::shutdown(): will create agent entry. Enabling leaving non-empty agent behind."
);
backendPopulator
.
reset
(
new
cta
::
objectstore
::
BackendPopulator
(
*
backend
,
processName
,
lc
));
}
catch
(
cta
::
exception
::
Exception
&
ex
)
{
log
::
ScopedParamContainer
param
(
lc
);
param
.
add
(
"errorMessage"
,
ex
.
getMessageValue
());
lc
.
log
(
log
::
CRIT
,
"In DriveHandler::shutdown(): failed to instantiate agent entry. Reporting fatal error."
);
goto
exitShutdown
;
}
osdb
.
reset
(
new
cta
::
OStoreDBWithAgent
(
*
backend
,
backendPopulator
->
getAgentReference
(),
*
m_catalogue
,
lc
.
logger
()));
lc
.
log
(
log
::
DEBUG
,
"In DriveHandler::shutdown(): will create scheduler."
);
std
::
unique_ptr
<
cta
::
Scheduler
>
scheduler
(
new
Scheduler
(
*
m_catalogue
,
*
osdb
,
0
,
0
));
cta
::
mediachanger
::
MediaChangerFacade
mediaChangerFacade
(
m_processManager
.
logContext
().
logger
());
castor
::
tape
::
System
::
realWrapper
sWrapper
;
castor
::
tape
::
tapeserver
::
daemon
::
CleanerSession
cleanerSession
(
...
...
@@ -1192,18 +1247,21 @@ SubprocessHandler::ProcessingStatus DriveHandler::shutdown() {
true
,
60
,
""
,
*
m_catalogue
);
*
m_catalogue
,
*
scheduler
);
cleanerSession
.
execute
();
}
}
m_sessionState
=
SessionState
::
Shutdown
;
m_processingStatus
.
nextTimeout
=
m_processingStatus
.
nextTimeout
.
max
();
m_processingStatus
.
forkRequested
=
false
;
m_processingStatus
.
killRequested
=
false
;
m_processingStatus
.
shutdownComplete
=
true
;
m_processingStatus
.
sigChild
=
false
;
return
m_processingStatus
;
exitShutdown:
m_sessionState
=
SessionState
::
Shutdown
;
m_processingStatus
.
nextTimeout
=
m_processingStatus
.
nextTimeout
.
max
();
m_processingStatus
.
forkRequested
=
false
;
m_processingStatus
.
killRequested
=
false
;
m_processingStatus
.
shutdownComplete
=
true
;
m_processingStatus
.
sigChild
=
false
;
return
m_processingStatus
;
}
std
::
unique_ptr
<
cta
::
catalogue
::
Catalogue
>
DriveHandler
::
createCatalogue
(
const
std
::
string
&
methodCaller
){
...
...
tapeserver/daemon/DriveHandler.hpp
View file @
24f8436a
...
...
@@ -26,6 +26,7 @@
#include
"tapeserver/session/SessionState.hpp"
#include
"tapeserver/session/SessionType.hpp"
#include
"catalogue/Catalogue.hpp"
#include
"scheduler/Scheduler.hpp"
#include
<memory>
namespace
cta
{
namespace
tape
{
namespace
daemon
{
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment