Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
ApplicationCore
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ChimeraTK Mirror
ApplicationCore
Commits
104242a5
Commit
104242a5
authored
4 years ago
by
Martin Christoph Hierholzer
Browse files
Options
Downloads
Patches
Plain Diff
wip #129: rearrange DeviceModule exception handling loop
parent
d7360f57
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/DeviceModule.cc
+70
-107
70 additions, 107 deletions
src/DeviceModule.cc
with
70 additions
and
107 deletions
src/DeviceModule.cc
+
70
−
107
View file @
104242a5
...
...
@@ -277,57 +277,87 @@ namespace ChimeraTK {
Application
::
registerThread
(
"DM_"
+
getName
());
std
::
string
error
;
owner
->
testableModeLock
(
"Startup"
);
// catch-all required around everything, to prevent any uncaught exception (including the boost thread interruption)
// to terminate this thread before waking up other threads waiting on the condition variable.
try
{
while
(
!
device
.
isOpened
())
{
try
{
boost
::
this_thread
::
interruption_point
();
while
(
true
)
{
// [Spec: 2.3.1] (Re)-open the device.
owner
->
testableModeUnlock
(
"Open/recover device"
);
do
{
usleep
(
500000
);
device
.
open
();
if
(
deviceError
.
status
!=
0
)
{
deviceError
.
status
=
0
;
deviceError
.
message
=
""
;
deviceError
.
setCurrentVersionNumber
({});
deviceError
.
writeAll
();
boost
::
this_thread
::
interruption_point
();
try
{
device
.
open
();
}
}
catch
(
ChimeraTK
::
runtime_error
&
e
)
{
if
(
deviceError
.
status
!=
1
)
{
deviceError
.
status
=
1
;
deviceError
.
message
=
e
.
what
();
deviceError
.
setCurrentVersionNumber
({});
deviceError
.
writeAll
();
catch
(
ChimeraTK
::
runtime_error
&
e
)
{
if
(
deviceError
.
status
!=
1
)
{
// deviceError.status == 0 if the initial open fails (no error yet reported)
deviceError
.
status
=
1
;
deviceError
.
message
=
e
.
what
();
deviceError
.
setCurrentVersionNumber
({});
deviceError
.
writeAll
();
}
continue
;
// should not be necessary because isFunctional() should return false. But no harm in leaving it in.
}
}
while
(
!
device
.
isFunctional
());
owner
->
testableModeLock
(
"Initialise device"
);
std
::
unique_lock
<
std
::
mutex
>
errorLock
(
errorMutex
);
// [Spec: 2.3.2] Run initialisation handlers
try
{
for
(
auto
&
initHandler
:
initialisationHandlers
)
{
initHandler
(
this
);
}
}
}
catch
(
ChimeraTK
::
runtime_error
&
)
{
// Jump back to re-opening the device
continue
;
}
// The device was successfully opened, try to initialise it
try
{
for
(
auto
&
initHandler
:
initialisationHandlers
)
{
initHandler
(
this
);
// [Spec: 2.3.3] Empty exception reporting queue.
while
(
errorQueue
.
pop
())
{
if
(
owner
->
isTestableModeEnabled
())
{
assert
(
owner
->
testableMode_counter
>
0
);
--
owner
->
testableMode_counter
;
}
}
for
(
auto
&
te
:
writeRecoveryOpen
)
{
if
(
te
->
getVersionNumber
()
!=
VersionNumber
{
nullptr
})
{
te
->
write
();
// [Spec: 2.3.4] Write all recovery accessors
try
{
boost
::
unique_lock
<
boost
::
shared_mutex
>
uniqueLock
(
recoverySharedMutex
);
for
(
auto
&
te
:
writeRecoveryOpen
)
{
if
(
te
->
getVersionNumber
()
!=
VersionNumber
{
nullptr
})
{
te
->
write
();
}
}
}
}
catch
(
ChimeraTK
::
runtime_error
&
e
)
{
// we just report the exception and let the exception handling loop do the rest
std
::
lock_guard
<
std
::
mutex
>
locakGuard
(
errorMutex
);
deviceHasError
=
true
;
if
(
errorQueue
.
push
(
e
.
what
()))
{
if
(
owner
->
isTestableModeEnabled
())
++
owner
->
testableMode_counter
;
catch
(
ChimeraTK
::
runtime_error
&
)
{
// Jump back to re-opening the device
continue
;
}
}
while
(
true
)
{
// We need one interruption point at the beginning of the loop. It is always executed and is not blocked by a lock (except the testable mode lock)
boost
::
this_thread
::
interruption_point
();
// [Spec: 2.3.5] Reset exception state and wait for the next error to be reported.
deviceError
.
status
=
0
;
deviceError
.
message
=
""
;
deviceError
.
setCurrentVersionNumber
({});
deviceError
.
writeAll
();
deviceHasError
=
false
;
// [Spec: 2.3.6+2.3.7] send the trigger that the device is available again
deviceBecameFunctional
.
write
();
errorLock
.
unlock
();
errorIsResolvedCondVar
.
notify_all
();
// [Spec: 2.3.8] Wait for an exception being reported by the ExceptionHandlingDecorators
// release the testable mode mutex for waiting for the exception.
owner
->
testableModeUnlock
(
"Wait for exception"
);
// Do not modify the queue without holding the testable mode lock, because we also consitenly have to modify the counter protected by that mutex.
// Do not modify the queue without holding the testable mode lock, because we also consistently have to modify
// the counter protected by that mutex.
// Just check the condition variable.
std
::
unique_lock
<
std
::
mutex
>
errorLock
(
errorMutex
);
errorLock
.
lock
(
);
while
(
!
deviceHasError
)
{
boost
::
this_thread
::
interruption_point
();
// Make sure not to start waiting for the condition variable if
// interruption was requested.
...
...
@@ -347,82 +377,15 @@ namespace ChimeraTK {
--
owner
->
testableMode_counter
;
}
// report exception to the control system
//
[Spec: 2.6.1]
report exception to the control system
deviceError
.
status
=
1
;
deviceError
.
message
=
error
;
deviceError
.
setCurrentVersionNumber
({});
deviceError
.
writeAll
();
// wait some time until retrying.
// Never sleep when holding a lock
errorLock
.
unlock
();
// we must not hold the error lock when not having the testable mode mutex
owner
->
testableModeUnlock
(
"Wait for recovery"
);
// TODO Implementation for Spec 2.6.2 goes here.
do
{
usleep
(
500000
);
boost
::
this_thread
::
interruption_point
();
try
{
// This triggers a recovery (device is already opened).
device
.
open
();
}
catch
(
ChimeraTK
::
runtime_error
&
)
{
// Recovery failed. Just catch the exception and retry.
continue
;
// should not be necessary because isFunctional() should return false. But no harm in leaving it in.
}
}
while
(
!
device
.
isFunctional
());
owner
->
testableModeLock
(
"Try recovery"
);
errorLock
.
lock
();
// FIXME: we have to wait here until the device reports that it is functional again.
// Otherwise we spam the error reporting with 2 Hz.
try
{
// re-initialise the device before continuing
for
(
auto
&
initHandler
:
initialisationHandlers
)
{
initHandler
(
this
);
}
{
// scope for the lock guard
boost
::
unique_lock
<
boost
::
shared_mutex
>
uniqueLock
(
recoverySharedMutex
);
for
(
auto
&
te
:
writeRecoveryOpen
)
{
if
(
te
->
getVersionNumber
()
!=
VersionNumber
{
nullptr
})
{
te
->
write
();
}
}
}
// end of scope for the lock guard
}
catch
(
ChimeraTK
::
runtime_error
&
e
)
{
// Report the error. This puts the exception to the queue and we can continue with waiting for the queue.
// It is sure we will enter it again because we just pushed to it, so if the device recovers we will notice.
// Do not use reportError, which would just do the mutext business in addition to pushing to the queue, but we already have the mutex.
if
(
errorQueue
.
push
(
e
.
what
()))
{
if
(
owner
->
isTestableModeEnabled
())
++
owner
->
testableMode_counter
;
}
continue
;
}
// We survived the initialisation (if any). It seems the device is working again.
// Empty exception reporting queue.
while
(
errorQueue
.
pop
())
{
if
(
owner
->
isTestableModeEnabled
())
{
assert
(
owner
->
testableMode_counter
>
0
);
--
owner
->
testableMode_counter
;
}
}
// Reset exception state and wait for the next error to be reported.
deviceError
.
status
=
0
;
deviceError
.
message
=
""
;
deviceError
.
setCurrentVersionNumber
({});
deviceError
.
writeAll
();
deviceHasError
=
false
;
// send the trigger that the device is available again
deviceBecameFunctional
.
write
();
errorLock
.
unlock
();
errorIsResolvedCondVar
.
notify_all
();
}
}
// while(true)
}
catch
(...)
{
// before we leave this thread, we might need to notify other waiting
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment