diff --git a/Modules/include/Logging.h b/Modules/include/Logging.h index 73416dc392069d51ec87c1c60e7f91afd2eb3196..ff3bf730ea972bda129e9636b7e20150ea0254d8 100644 --- a/Modules/include/Logging.h +++ b/Modules/include/Logging.h @@ -189,12 +189,17 @@ namespace logging { private: ctk::VariableNetworkNode getAccessorPair(const std::string& sender); - /** Map key is the feeding module */ - std::map<std::string, ctk::ScalarPushInput<std::string> > msg_list; - - /** Find the Message that was updated. - */ - std::map<std::string, ctk::ScalarPushInput<std::string> >::iterator FindSender(const ChimeraTK::TransferElementID& id); + struct MessageSource{ + ctk::ScalarPushInput<std::string> msg; + std::string sendingModule; + MessageSource(const std::string &moduleName, Module* module): + msg{module, moduleName + "Msg", "", ""},sendingModule(moduleName){}; + }; + /** List of senders. */ + std::vector<MessageSource > sources; + + /** Map key is the transfer id of the ScalarPushInput variable pointed to */ + std::map<ChimeraTK::TransferElementID, MessageSource* > id_list; /** Number of messages stored in the tail */ size_t messageCounter; diff --git a/Modules/src/Logging.cc b/Modules/src/Logging.cc index 39070f3cba05acf898c341dbdc73c0f81d983a65..0763c2a859d55a15a0edf2f6c41350a59c5c8ab9 100644 --- a/Modules/src/Logging.cc +++ b/Modules/src/Logging.cc @@ -100,25 +100,44 @@ void LoggingModule::mainLoop() { file.reset(new std::ofstream()); messageCounter = 0; std::stringstream greeter; - greeter << getName() << " " << getTime() << "There are " << msg_list.size() + greeter << getName() << " " << getTime() << "There are " << sources.size() << " modules registered for logging:" << std::endl; broadcastMessage(greeter.str()); - for(auto& module : msg_list) { - broadcastMessage(std::string("\t - ") + module.first); + for(auto module = sources.begin(); module != sources.end();module++) { + broadcastMessage(std::string("\t - ") + module->sendingModule); + id_list[module->msg.getId()] = &(*module); } auto group = readAnyGroup(); + while(1) { auto id = group.readAny(); - auto sender = FindSender(id); + if(id_list.count(id) == 0){ + throw ChimeraTK::logic_error("Cannot find element id" + "when updating logging variables."); + } + std::string msg; + MessageSource* currentSender; + LogLevel level; + try{ + currentSender = id_list.at(id); + msg = (std::string)(currentSender->msg); + } catch (std::out_of_range &e){ + throw ChimeraTK::logic_error("Cannot find element id" + "when updating logging variables."); + } + try{ + level = static_cast<LogLevel>(std::strtoul(&msg.at(0),NULL, 0)); + } catch (std::out_of_range &e){ + throw ChimeraTK::logic_error("Cannot find message level" + "when updating logging variables."); + } if(targetStream == 3) continue; - auto strlevel = ((std::string)(sender->second))[0]; - LogLevel level = static_cast<LogLevel>(std::strtoul(&strlevel,NULL, 0)); LogLevel setLevel = static_cast<LogLevel>((uint)logLevel); - std::string tmpStr = (std::string)(sender->second); + std::string tmpStr = msg; // remove message level tmpStr = tmpStr.substr(1,tmpStr.size()); std::stringstream ss; - ss << level << getName() << "/" << sender->first << " " << getTime() << tmpStr; + ss << level << getName() << "/" << currentSender->sendingModule << " " << getTime() << tmpStr; if(targetStream == 0 || targetStream == 1) { if(!((std::string)logFile).empty() && !file->is_open()) { std::stringstream ss_file; @@ -151,24 +170,16 @@ void LoggingModule::addSource(boost::shared_ptr<Logger> logger) { ctk::VariableNetworkNode LoggingModule::getAccessorPair( const std::string& sender) { - if(msg_list.count(sender) == 0) { - msg_list.emplace(std::piecewise_construct, std::make_tuple(sender), - std::make_tuple(ctk::ScalarPushInput<std::string>{this, sender + "Msg", "", ""})); + auto it = std::find_if(sources.begin(), sources.end(), + boost::bind(&MessageSource::sendingModule, _1) == sender); + if(it == sources.end()) { + sources.emplace_back(MessageSource{sender,this}); } else { throw ChimeraTK::logic_error( "Cannot add logging for module " + sender + " since logging was already added for this module."); } - return msg_list[sender]; -} - -std::map<std::string, ctk::ScalarPushInput<std::string> >::iterator LoggingModule::FindSender(const ChimeraTK::TransferElementID& id) { - for(auto it = msg_list.begin(), iend = msg_list.end(); it != iend; it++) { - if(it->second.getId() == id) - return it; - } - throw ChimeraTK::logic_error("Cannot find element id" - "when updating logging variables."); + return sources.back().msg; } void LoggingModule::terminate() { diff --git a/doc/exceptionHandlingDesign.dox b/doc/exceptionHandlingDesign.dox index c060e83ad51c6ffa350c3b9a85df4ad796774047..64c686c38ee9f04dd449724ad3d4949df3543d40 100644 --- a/doc/exceptionHandlingDesign.dox +++ b/doc/exceptionHandlingDesign.dox @@ -1,182 +1,211 @@ /** -\page exceptionHandlingDesign Exception Handling Design -\section gen_idea General Idea - - -Exceptions must be handled by ApplicationCore in a way that the application developer does not have to care much about it. - -In case of a ChimeraTK::runtime_error exception the Application must catch the expection and report it to the DeviceModule. The DeviceModule should handle this exception and block the device until the device can be opened again. As there could many devices make sure only the faulty device is blocked. -Even if a device is faulty it should not block the server from starting. - -Once in error state, set the DataValidity flag for that module to faulty and propogate this to all of it‘s output variables. After the exception is cleared and operation returns without a data fault flag, set DataValidity flag to ok. Furthermore, the device must be reinitialised automatically and also recover the values of process variables as the device might have rebooted and the variables have been re-set. - - --<b>1. Genesis</b> - -- a) When DeviceModule is created it is registered with Application. (Added to a list in Application::registerDeviceModule.) -- b) An initailisation handler can be added to the device through constructor. Initialisation handlers are callback function which will be executed after a device recovers from an exception. -- c) A list of TransferElements shared pointers is created as writeAfterOpen which is used to write constants after the devcie is opened. -- d) A list of TransferElements shared pointers is created as writeRecoveryOpen which is populated in function addRecoveryAccessor in the DeviceModule. -- e) ChimeraTK::NDRegisterAccessor is used to access the device variables inside class Application. -- f) Class ExceptionHandlingDecorator facilitates ChimeraTK::NDRegisterAccessor in case of exception -- g) An ExceptionHandlingDecorator is placed around all NDRegisterAccessors coming from a device. -- h) Recovery accessor is added for writebale register when ChimeraTK::NDRegisterAccessor is obtianed. These recovery accessors are used to recover the values of variables after the recovery. -- i) setOnwer() is used to set the application module or variable group as owner of the (feeding) device which is decorated with and ExceptionHandlingDecorator. +\page exceptionHandlingDesign Technical specification: Exception Handling Design --<b>2. The Flow</b> +<b>DRAFT VERSION, WRITE-UP IN PROGRESS!</b> -- 2.1. Application always starts with all devices as closed. - -- 2.2. Until the device is opened all the read and writes to this device will be delayed. - -- 2.3. The device is opened for the first time inside DeviceModule::handleException() in an separate asynchronous thread. - -- 2.4. If there is no exception +\section gen_idea General Idea - - 2.4.1. deviceError.status is set to 0. +Exceptions must be handled by ApplicationCore in a way that the application developer does not have to care much about it. - - 2.4.2. Device is initailised iterating over initialisationHandlers list. +In case of a ChimeraTK::runtime_error exception the framework must catch the expection and report it to the DeviceModule. The DeviceModule handles this exception and preiodically tries to open the device. In case of several devices only the faulty device is blocked. Even if a device is faulty it should not block the server from starting. - - 2.4.3. Constant feeders are written to the device using writeAfterOpen(). +If an input variable is in the error state, it sets the DataValidity flag for its DataValidityProparationExecutor (see \link spec_dataValidityPropagation \endlink) to faulty and the flag is propogated appropriately. After the exception is cleared and operation returns without a data fault flag, set DataValidity flag to ok. Furthermore, the device must be reinitialised automatically and also recover the values of process variables as the device might have rebooted and the variables have been re-set. -- 2.5. When a read / write operation on device causes a ChimeraTK::runtime_error exception, the exception is caught. See g. - - 2.5.1. Inside ExceptionHandlingDecorator +<b>1. Genesis</b> - - 2.5.1.1. The dataValidity of the DeviceModule is set to faulty using setOwnerValidityFunction(DataValidity::faulty) +- a (removed) +- b. An initailisation handler can be added to the DeviceModule in the user code. Initialisation handlers are callback function which will be executed when a device is opened for the first time and after a device recovers from an exception, before any process variables are written. +- c. Initial values must be correctly propogated after a device is opened. See \link spec_initialValuePropagation \endlink. Especially, no read function (even readNonBlocking/readLatest) must return before an initial value has been received. +- d. (removed) +- e. A ChimeraTK::ExceptionHandlingDecorator is placed around all ChimeraTK::NDRegisterAccessors which connect a device to a ChimeraTK::ApplicationModule or fanout. (*) +- f. (removed) +- g. By default a recovery accessor is added for each device register when it is obtianed. These recovery accessors are used to correctly set the values of variables when the device is opened for the first time and after a device is recovered from an exception. (*) +- h. A ChimeraTK::ExceptionHandlingDecorator for an input knows its DataValidityProparationExecutor, which lives in the ApplicationModule or fanout that reads the input. Like this it can propagate the + dataValidity flag. Outputs do not send DataValidity faulty in case of exceptions (see \link spec_dataValidityPropagation \endlink). +- i. Write should not block in case of an exception for the outputs of ThreadedFanOut / TriggerFanOut. (*) - - 2.5.1.2. incrementDataFaultCounter(true) is called. +<b>2. The Flow</b> - - 2.5.1.3. Error is reported to DeviceModule with the exception as DeviceModule::reportException(e.what). +- 2.1. The application always starts with all devices as closed and intial value for deviceError.status is set to 1. The DeviceModule takes care that ExceptionHandlingDecorators do not perform any read or write operations, but block. This must happen before running any prepare() of an ApplicationModule, where the first write calls to ExceptionHandlingDecorators are done. - - 2.5.1.4. incrementDataFaultCounter is picked up by MetaDataPropagatingDecorator and all the outputs are set faulty. +- 2.2 In ApplicationModule::prepare() some initial values (and constants) are written. As the ExceptionHandlingDecorator must not perform the actual write at this point, it will put the value into the dataRecoveryAccesssor and report an exception to the DeviceModule. + - 2.2.3 Although ApplicationModule and fanout threads start after the device module threads, the application is now asyncronous and read or write operations can already take place in the main loops, even if the device is not ready yet (it might actually be broken). All read and write operations are blocked buy the exceptionHandlingDecorators at this point. - - 2.5.2. In DeviceModule::reportException +- 2.3 The device module thread starts. + - 2.3.1 The DeviceModule tries to open the device until it succeeds.(*) + - 2.3.2 Device is initailised by iterating initialisationHandlers list. If there is an exception go back to 2.2.1. (*) + - 2.3.3 The list of reported exceptions is cleared. (*) + - 2.3.4 All valid (*) recovery accessors are written. If there is an exception go back to 2.3.1. (*) + - 2.3.5 deviceError.status is set to 0. + - 2.3.6 DeviceModule allows that ExceptionHandlingDecorators execute reads and writes. + - 2.3.7 All blocked read and write operations (from 2.5.3) are notified.(*) + - 2.3.8 The DeviceModuleThread waits for the next reported exception. - - 2.5.2.1. The Error is pushed into an error queue and the deviceError.status is set to 1. +- 2.4 Device and Application are running normally + - 2.4.1 All blocked ExceptionHandlingDecorators continue (*) + - 2.4.1.1 write just continues (recovery accessor has done the write) + - 2.4.1.2 read/readNonBlocking/readLatest + - 2.4.1.2.1 tells the DataValidityPropagationExecutor that the device error has gone + - 2.4.1.2.2 (re-)tries to get the value. In case of an exception go to 2.5 + - 2.4.2 In the ExceptionHandlingDecorator, all write calls always fill the value into the recovery accessors before trying to execute the real write. Like this, the recovery accessor always has the last value that should have been written to the device. All recovery accessors become valid over time (see comment for 2.3.4). + - 2.4.2.1 If a write is not executed because the device is already faulty (from 2.2 or 2.6.1), the recovery accessor has to take care of this. In this case we always have to send another exception notification to the DeviceModule to make sure that the new recovery value is not missed (avoid race condition). (*) - - 2.5.2.2. The caller is blocked until the error state is resolved i.e., device can be opened again. - - 2.5.3. Exception is handled by DeviceModule::handleException() in a separate thread. +- 2.5. When a read / write operation on the device (1.e) causes a ChimeraTK::runtime_error exception, the exception is caught in the ExceptionHandlingDecorator + - 2.5.1. If it is a read operation the DataValidityPropagationExecutor is informed that there was a device error. (*) + - 2.5.2. The error is reported to the DeviceModule + - 2.5.3. Action depending on the calling operation : + - write : blocks until the device is recovered. + - read : If the accessor has aleady seen its initial value, the first "blocking" read call returns immediately (remember DataValidity is set to faulty). The ExceptionHandlingDecorator remembers that it is in an exception state. The calling module thread will continue and propagate the data invalid flag. The second call will finally block. If there has not been an initial value yet, even the first call will block until it is available. + - readNonBlocking / readLatest: will always return with data invalid flag (unless there has not been an initial value yet). + - writeWithoutErrorBlocking: just returns (*) - - 2.5.4. It will keep on trying to open the device until successful. +- 2.6 The exception is received in the DeviceModule thread + - 2.6.1 deviceError.status will be set to 1. From this point on, all ExceptionHandlingDecorators for this device must block all read and write operations (see also 2.2 and 2.3.6). + - 2.6.2 The thread goes back to 2.2.1 and tries to re-open the device. - - 2.5.5. Once device is opened, - - 2.5.5.1. deviceError.status is set to 0. +<b>3. (*) Comments</b> - - 2.5.5.2. device is reinitalisied through initialisationHandlers. +- 1.e. In addition there can be recovery accesors for the same variables, which are not decorated. They are not directly seen by the ApplicationModule and the fanouts. +- 1.g. Output accessors can have the option not to have a recovery accessor. This is needed for instance for "trigger registers" which start an operation on the hardware. Also void registers don't have recovery accessors. +- 1.i. The specification for initial value propagation (\link spec_initialValuePropagation \endlink) also says that writes ApplicationModules don't block before the first successful read in the main loop. - - 2.5.5.3. process variables are written again through writeRecoveryOpen(). +- 2.3.1 Successul opening includes that the device reports isFunctional() as true. +- 2.3.2 and 2.3.4 Exceptions for re-initialisation and recovery will be reported once, but not if it occurs again before the device has completely recovered. +- 2.3.3 ExceptionHandlingDecorators must always first write the recovery accessor, then report an exception. As the device module clears the exceptions first, then processes the accessors, it is guaranteed that no value is missed. As a side effect it can be that a pending exception triggers an unnecessary recovery loop in the device module. +- 2.3.4 If a recovery accessors has not seen an initial value yet, it will not be written (see \link spec_initialValuePropagation \endlink). +- 2.3.7 This is different from 2.2.6 because 2.2.6 affects accessors which want to perform a read or write, while 2.2.7 affects accessors that failed to do so and are waiting for the device to become available again. This is needed for two cases: + - 1. A blocking write, where the recovery accessor has already done the job when the device if back to OK. + - 2. The first blocking read if the data has not seen the initial value yet, and retrieving it casued the exception. +- 2.4.1 writeWithoutErrorBlocking is not mentioned because it never blocks. Although blocked by different mechanisms read/readNonBlocking/readLatest behave the same: + - read is either the second read call which is expected to deliver the next value, or any of the three are still waiting for the initial value. In any case they have to (re-)try reading. +- 2.4.2.1 Basically after each update of the recovery accessor there has to be a valid write, or an exception has to be reported to the DeviceModule, to make sure the value is seen by the device (unless the recovery accessor is updated before this happens). +- 2.5.1 incrementDataInvalidCounter() is called. See \link spec_dataValidityPropagation \endlink. - - 2.5.5.4. caller thread is notified and it no longer remains blocked. (from 2.5.2.2) +<b>Implmentation Details</b> --<b>3. Add an exception handling and reporting machinsm to the device module (DeviceModule).</b> +<b>4. Exception handling and reporting mechanism to the device module (DeviceModule).</b> Description. -Add two error state variables. -- "state" (boolean flag if error occurred) -- "message" (string with error message) + These variables are automatically connected to the control systen in this format - /Devices/{AliasName}/message - /Devices/{AliasName}/status -Add a thread safe function reportException(). +Add a thread safe function ChimeraTK::DeviceModule::reportException(). A user/application can report an exception by calling reportException of DeviceModule with an exception string. The reportException packs the exception in a queue and the blocks the thread. This queue is processed by an internal function handleException which updates the DeviceError variables (status=1 and message="YourExceptionString") and tries to open the device. Once device can be opened the DeviceError variables are updated (status=0 and message="") and blocking threads are notified to continue. It must be noted that whatever operation which lead to exception e.g., read or write, should be repeated after the exception is handled. -Implmentation. -- DeviceModule +Implementation. +- ChimeraTK::DeviceModule --<b>4. Catch ChimeraTK::runtime_error exceptions.</b> +<b>5. Catch ChimeraTK::runtime_error exceptions.</b> Description. -Catch all the ChimeraTK::runtime_error exceptions that could be thrown in read and write operations and feed the error state into the DeviceModule through the function DeviceModule::reportException() . NDRegisterAccessors coming from device should be used as a singal central point to catch these excpetions. +For a device with it's deviceError.status = 0 (see 2.4.3), catch all the ChimeraTK::runtime_error exceptions that could be thrown in read and write operations and feed the error state into the DeviceModule through the function ChimeraTK::DeviceModule::reportException(). Retry the failed operation after reportException() returns. -Implmentation. +For a device that has been opened for the first time but has not reached 2.4.3 i.e., it's deviceError.status != 0, and it throws a ChimeraTK::runtime_error exception see 2.3. -It is done by placing a ExceptionHandlingDecorator around all NDRegisterAccessors coming from a device. -- NDRegisterAccessors -- Application +Implementation. +- Exceptions are caught as explained in 1.e and 1.f. +- ChimeraTK::NDRegisterAccessors +- ChimeraTK::Application --<b>5. Faulty device should not block any other device.</b> +<b>6. Faulty device should not block any other device.</b> Description. -Each TriggerFanOut deals with several variable networks at the same time, which are triggered by the same trigger. Each variable network has its own feeder and one or more consumers. You do not need to change anything about the variable networks. -On the other hand, the trigger itself is a variable network, too. The TriggerFanOut has a consumer of this trigger network. This is the accessor on which the blocking read() is called in the loop. You will need to create additional consumers in the trigger network, one for each TriggerFanOut. - +Each ChimeraTK::TriggerFanOut deals with several variable networks at the same time, which are triggered by the same trigger. Each variable network has its own feeder and one or more consumers. The trigger itself is a variable network, too. One consumer per ChimeraTK::TriggerFanOut is required. Implementation. -- Application (Application::typedMakeConnection) +- ChimeraTK::Application::typedMakeConnection() --<b>6. The server must always start even if a device is in error state.</b> +<b>7. The server must always start even if a device is in error state.</b> Description. -To make sure that the server should always start, the initial opening of the device should take place in the DeviceModule itself, inside the exception handling loop so that device can go to the error state right at the beginning and the server can start despite not all its devices are available. +To make sure that the server should always start, the initial opening of the device should take place in the ChimeraTK::DeviceModule::handleException(), which has the exception handling loop so that device can go to the error state right at the beginning and the server can start despite not all its devices are available. -Implementation. +Does not fit here, but is the only place where handleException is mentioned: +- handleException() must not block. -- DeviceModule ( DeviceModule::handleException() ). +Implementation. +- ChimeraTK::DeviceModule::handleException() --<b>7. Set/clear fault flag of module in case of exception.</b> -Background. - -A DataValidity flag of a module is set to faulty if any input variables returns with a set data fault flag after a read operation and is cleared once all inputs have data fault no longer set. In a write operation, the module's data fault flag status is attached to the variable to write. -More detail ...(Martin‘s doc) +<b>8. Propogate error flag</b> Description. -In case of an ChimeraTK::runtime_error exception this DataValidity flag should also be set to faulty and propogated to all outputs of the module. When the operation completes after clearing the exception state, the flag should be cleared as well. - -Implmentation. -- ExceptionHandlingDecorator -- TriggerFanOut +See 2.5.1. -Additional note from code author. -Note that if the data is distributed through a triggered FanOut (i.e. variables from device is connected to other variables through a trigger, the usual way for poll-type variables) the data read from the receiving end of the variable cannot be considered valid if the DataValidity is faulty. -Additionaly, a change of to a faulty validity state will signal the availability of new data on those variables, which is to be considered invalid. +For initial error propogation see <a href='spec_initialValuePropagation.html'>spec_initialValuePropagation</a>. +Implmentation. +- ChimeraTK::ExceptionHandlingDecorator +- ChimeraTK::TriggerFanOut -Bahnhof.Variables which are Constants or outputs of the ConfigReader and are connected to a DeviceModule should be written in an initialisation handler. Currently they are written in ConfigReader::pepare() etc., which might block the application initialisation if an exception occurs in the process of writing these variables. --<b>8. Initialise the device after recovey.</b> +<b>9. Initialise the device</b> Description. -If a device is recovered after an exception, it might need to be reinitialised (e.g. because it was power cycled). The device should be automatically reinitialised after recovery. +The device should be automatically initialised when opened for first time (2.4.1) and automatically re-initialised after recovery (2.5.3.4). Implementation. A list of DeviceModule std::function is added. InitialisationHandlers can be added through construtor and addInitialisationHandler() function. When the device recovers all the initialisationHandlers in the list are executed. -- DeviceModule +- ChimeraTK::DeviceModule +- ChimeraTK::ExceptionHandlingDecorator --<b>9. Recover process variables after exception.</b> +<b>10. Recover process variables after exception.</b> Background. After a device has failed and recovered, it might have re-booted and lost the values of the process variables that live in the server and are written to the device. Hence these values have to be re-written after the device has recovered. Description. -Technically the issue is that the original value that has been written is not safely accessible when recovering. Inside the accessor the user buffer must not be touched because the recovery is taking place in a different thread. In addition we don't know where the data is (might or might not have been swapped away, depending whether write() or writeDestructively() has been call by the user). -The only race condition free way is to create a copy when writing the data to the device, so they are available when recovering. + +Create a copy of accessor when writing the data to the device and use this to recover the values when the device is available again. Recovery accessor do not write if the register is never written before (2.5.3.5.). Implementation. -- DeviceModule -- ExceptionHandlingDecorator -A list of TransferElements shared pointers is created with as writeRecoveryOpen which is populated in function addRecoveryAccessor in the DeviceModule. -ExceptionHandlingDecorator is extended by adding second accessor to the same register as the target accessor it is decorating and data is copied in doPreWrite(). +- ChimeraTK::DeviceModule +- ChimeraTK::ExceptionHandlingDecorator +- A list of ChimeraTK::TransferElements is created as ChimeraTK::DeviceModule::writeRecoveryOpen which is populated in function ChimeraTK::DeviceModule::addRecoveryAccessor(). +ChimeraTK::ExceptionHandlingDecorator is extended by adding second accessor to the same register as the target accessor it is decorating. +<I> Data is copied in doPreWrite(). [TBD: Do we want this behaviour? => Yes, it has to happen before the original accessor's pre-write because this is the last occasion where the data is still guarateed to be in our user buffer. The accessor's pre-write might swap the data out, and it might never be available again (in case of write desrictively).]</I> +- As the user buffer recovery accessor is written in an AppicationModule or fanout thread, but read in the DeviceModule thread when recovering, it has to be protected by a mutex. For efficiency one single shared mutex is used. All ExceptionHandlingDecorators will accquire a shared lock, as each decorator only touches his own buffer. The DeviceModule, which writes all recovery accessors, uses the unique lock to prevent any ExceptionHandlingDecorator to modify the user buffer while doing so. + +<b> ExceptionHandlingDecorator </b> + +- Device accessors must only throw in postRead and postWrite (FIXME: move text from initial value propagation spec) +- The Decorator only decorates postRead / postWrite (FIXME: conceptually, which one is the correct one?) +- The decorator provides a writeWithoutErrorBlocking() function so that even in case of exception write should return. [TBD: name of the function] + +Like this the decoration also works for transfer groups and asyncronous transfers. + +<b>5. Known Bugs.</b> + +- Step 2.1 The intial value of deviceError is not set to 1. + +- Step 2.2. is not correctly fulfilled as we are only waiting for device to be opened and don't wait for it to be correctly initialised. + +- Step 2.4.3. is currently being set before initialisationHandlers and writeAfterOpen. + +- Step 2.5.3.7. is currently being set before initialisationHandlers and writeRecoveryOpen. +- Check the comment in Device.h about writeAfterOpen(). 'This is used to write constant feeders to the device.' +- Check the documentation of DataValidity. ...'Note that if the data is distributed through a triggered FanOut....' diff --git a/doc/main.dox b/doc/main.dox index 4643859a56a4afde4d7682151187e27c001e9e12..678ac61950690edee413653062d62a4a18c372f0 100644 --- a/doc/main.dox +++ b/doc/main.dox @@ -3,7 +3,6 @@ API documentation: - \subpage exceptionHandling -- \subpage exceptionHandlingDesign Module documentation: - \subpage loggingdoc @@ -19,5 +18,6 @@ Examples: Technical specifications: - \subpage spec_initialValuePropagation +- \subpage exceptionHandlingDesign */ diff --git a/doc/spec_dataValidityPropagation.md b/doc/spec_dataValidityPropagation.md new file mode 100644 index 0000000000000000000000000000000000000000..4660a4f4ad6920a28260476d885da8fec361b036 --- /dev/null +++ b/doc/spec_dataValidityPropagation.md @@ -0,0 +1,8 @@ +Technical specification: data validity propagation {#spec_dataValidityPropagation} +====================================================== + +Brainstorming for the spec +* Explain general idea +* Explain DataValidityProparationExecutor and MetaDataPropagatingRegisterDecorator +* Explain use case ApplicationModule and TriggerFanOut +* Explain interaction with exception handling diff --git a/doc/spec_initialValuePropagation.md b/doc/spec_initialValuePropagation.md index e06277e549c124a5c2891a156c21f31cd18b3c56..6f349fd4870fe364bc3692eb5e04059a1ebaef53 100644 --- a/doc/spec_initialValuePropagation.md +++ b/doc/spec_initialValuePropagation.md @@ -5,98 +5,220 @@ Technical specification: propagation of initial values {#spec_initialValuePropag ## Introduction ## -This document describes how initial values are propagated from the control system persistency layer, from the devices and (if applicable) from application modules into the attached components (control system, devices and other application modules). +This document describes how initial values are propagated from the control system persistency layer, from the devices and from application modules into the attached components (control system, devices and other application modules). This specification goes beyond ApplicationCore. It has impact on other ChimeraTK libraries like DeviceAccess, the ControlSystemAdapter and even backends and adapter implementations. ## Definitions ## -- Initial value: The first valid value of a process variable after application start. This is a logical concept. It is to be distinguished from the (hardcoded) "first value" of the `ChimeraTK::ProcessArray` or any other `ChimeraTK::NDRegisterAccessor` implementation. +- Initial value: The start value of a process variable. The value is available to the receiving end of the process variable at a well defined point in time at the start. This is a logical concept. It is to be distinguished from the (often hardcoded) "value after construction" of the `ChimeraTK::ProcessArray` or any other `ChimeraTK::NDRegisterAccessor` implementation. The point in time when the value becomes available is well-defined, as described in the high-level requirements. ## High-level requirements ## -- Initial values must be available to all `ApplicationModule`s at the start of the `mainLoop()`. No call to `read()` etc. is required. This implies that the `mainLoop()` is not started until all initial values are available, including those coming from devices which might potentially be offline. -- Devices must receive the initial values as soon as possible after the device is opened and after the initialisation sequence is executed, but before anything else gets written to the device. -- The control system must receivce the initial values as soon as they are available. The initial value is merely the first value the control system receives for a particular process variable - other variables might have received an update already multiple times before the initial value is recieved. -- Control system variables show the `DataValidity::faulty` flag until they have received the initial value. +- Initial values must be available to all `ChimeraTK::ApplicationModule`s at the start of the `ChimeraTK::ApplicationModule::mainLoop()`. No call to `ChimeraTK::TransferElement::read()` etc. is required. This implies that the `ChimeraTK::ApplicationModule::mainLoop()` is not started until all initial values are available, including those coming from devices which might potentially be offline, or from other `ApplicationModule`s. +- `ChimeraTK::ApplicationModule` implementations can either provide initial values for their outputs in `ChimeraTK::ApplicationModule::prepare()` (if the initial value doesn't depend on any inputs) or right after the start of the `ChimeraTK::ApplicationModule::mainLoop()` (if the initial value needs to be computed from the incoming initial values of the inputs). +- The "value after construction" must not be propagated automatically during initial value propagation, not even with the `ChimeraTK::DataValidity::faulty` flag set. It must not be visible to user code in the `ChimeraTK::ApplicationModule::mainLoop()`. +- Since `ChimeraTK::ApplicationModule`s might wait for initial values from other `ChimeraTK::ApplicationModule`s, the modules might end up in a dead lock due to a circular connection. The circular connection is legal, but the dead lock situation needs to be broken by one `ChimeraTK::ApplicationModule` writing its initial value during `ChimeraTK::ApplicationModule::prepare()`. +- Devices must receive the initial values as soon as possible after the device is opened and after the initialisation sequence is executed. There is no guarantee that other registers of the same device are written or read only after the initial values are written. Hence, any critical registers that need to be written before accessing other registers must be written in the initialisation sequence. +- The control system doesn't receive "initial values" as such. The first value of a process variable is sent to the control system when available. This may depend even on external conditions like the availability of devices, e.g. the control system interface has to run even if devices are not available and hence cannot send an inital value. +- Control system variables show the `ChimeraTK::DataValidity::faulty` flag until they have received the first valid value. +- For push-type variables from devices, the initial value is the current value polled at the application start. Since the variable might not get pushed regularly, the application must not wait for a value to get pushed. ## Detailed requirements ## -1. All `ChimeraTK::NDRegisterAccessor` implementations (including but not limited to the `ChimeraTK::ProcessArray`) must have the `DataValidity::faulty` flag set after construction for the receiving end. This ensures, all data is marked as `faulty` as long as no sensible initial values have been propagated. The sending end must have `DataValidity::ok`, so that the first written data automatically propagates the ok state. [TBD: What about bidirectional variables?] -2. All `ChimeraTK::NDRegisterAccessor` implementations must have initially a `ChimeraTK::VersionNumber` constructed with a `nullptr`, which allows to check whether this variable is still at its "first value" or the initial value propagation already took place. -3. All `ApplicationModules` and similar entities (like `ThreadedFanOut` and `TriggerFanOut`), that store a `DataValidity` directly or indirectly e.g. in form af a counter, must have their internal `DataValidity` flag set to `ok` after construction. -4. The initial `DataValidity::faulty` flags must not be propagated actively. The first propagated data must be always `ok` and must have a valid value. +1. All `ChimeraTK::NDRegisterAccessor` implementations (including but not limited to the `ChimeraTK::ProcessArray`) must have the `ChimeraTK::DataValidity::faulty` flag set after construction for the receiving end. This ensures, all data is marked as `faulty` as long as no sensible initial values have been propagated. The sending end must have `ChimeraTK::DataValidity::ok` after construction, so that the first written data automatically propagates the ok state by default. For bidirectional variables, this must be the case for both directions separately. +2. All `ChimeraTK::NDRegisterAccessor` implementations must have initially a `ChimeraTK::VersionNumber` constructed with a `nullptr`, which allows to check whether this variable is still at its "value after construction", or the initial value propagation already took place. +3. `ChimeraTK::ApplicationModule` (and `ChimeraTK::ThreadedFanOut`/`ChimeraTK::TriggerFanOut`) propagate the `ChimeraTK::DataValidity` of its outputs according to the state of all inputs. This behaviour is identical to later during normal data processing. +4. (removed) 5. Control system variables: - 1. Variables with the control-system-to-application direction must be written exactly once at application start by the control system adapter with their initial values from the persistency layer and the `DataValidity::ok`. This must be done before `ApplicationBase::run()` is called. [TBD: Is this last sentence a necessary restiction?] - 2. Initial values of variables with the application-to-control-system direction are written at an undefined time after the `ApplicationBase::run()` has been called. The control system adapter must not expect any specific behaviour. Entities writing to these variables do not need to take any special precautions, they do not even need to obey the second sentence in 4. In other words: application-to-control-system variables do not have an "initial value" in this particular meaning. + 1. Variables with the control-system-to-application direction must be written exactly once at application start by the control system adapter with their initial values from the persistency layer. This must be done before `ChimeraTK::ApplicationBase::run()` is called, or soon after (major parts of the application will be blocked until it's done). If the persistency layer can persist the `ChimeraTK::DataValidity`, the initial value should have the correct validity. Otherwise, initial values will always have the `ChimeraTK::DataValidity::ok`. + 2. Variables with the application-to-control-system direction do not have an "initial value". The first value of these variables are written at an undefined time after the `ChimeraTK::ApplicationBase::run()` has been called. The control system adapter must not expect any specific behaviour. Entities writing to these variables do not need to take any special precautions. 6. Device variables: - 1. Write accessors need to be written right after the device is opened and the initialisation is done. - 2. Read accessors need to be read after 6.a. [TBD: Is this ordering even possible? It is more like a 'nice to have' and not strictly required.] -7. Outputs of `ApplicationModule`s: - 1. By default, no initial values are propagated. - 2. Initial values can be written in `ApplicationModule::prepare()`. This fact is recorded in the variable model (`VariableNetworkNode`), see 8.b.v - 3. Since in `ApplicationModule::prepare()` all devices are still closed, any writes to device variables at this point need to be delayed until the device is open. The actual write is hence performed by the DeviceModule. -8. Inputs of `ApplicationModule`s: - 1. Initial values are read before start of `mainLoop()`. - 2. Since not all variables have initial values (see 7.a), the variable model (`VariableNetworkNode`) needs to be checked whether an initial value is present and how it needs to be read. This dependes on the data source type (i.e. the type of the feeder of the VariableNetwork): - 1. control system variable: blocking read - 2. device register without trigger: non-blocking read (even if register is push-type) - 3. device register with trigger (incl. TriggerType::pollingConsumer): blocking read - 4. constant: non-blocking read - 5. application: blocking read only if initial value was provided (see 7.a), otherwise no read -9. `ThreadedFanOut` and `TriggerFanOut` etc. - 1. Inputs need to behave like described in 8.b - 2. Outputs connected to devices need to obey 6.a - 3. Outputs connected to `ApplicationModule`s will pass on the initial value, as the `ApplicationModule` will obey 8.b just like the FanOut input. -10. Constants: - 1. Values are propagated before the `ApplicationModule` threads are starting. - 2. Special treatment for constants written to devices: They need to be written after the device is opened, see 6.a + 1. Write accessors need to be written after the device is opened and the initialisation is done, as soon as the initial value is available for that variable. Initial values can be present through 5.a, 6.b or 7. + 2. Initial values for read accessors must be read after the device is openend and the initialsation is done. The read is performed with `ChimeraTK::TransferElement::readLatest()`. +7. Outputs of `ChimeraTK::ApplicationModule`s: + 1. Initial values can be written in `ChimeraTK::ApplicationModule::prepare()`, if the value does not depend on any input values (since input values are not available during `prepare()`). + 2. Alternatively, initial values can be written in `ChimeraTK::ApplicationModule::mainLoop()` before calling any `read` function. Typically, to propagate the initial values of its inputs, an `ApplicationModule` will run its computations and write its outputs first before waiting for new data with a blocking `read()` and the end of the processing loop. The application author needs to take into account that in this case `write` functions might block until the target device becomes available and hence block the further propagation of the initial values. + 3. Since in `ChimeraTK::ApplicationModule::prepare()` all devices are still closed, any writes to device variables at this point need to be delayed until the device is open. The actual write is hence performed asynchronously in a different thread. +8. Inputs of `ChimeraTK::ApplicationModule`s: + 1. Initial values are read before the start of `ChimeraTK::ApplicationModule::mainLoop()` (but already in the same thread which later executes the `mainLoop()`). + 2. It depends on the data source type (i.e. the type of the feeder of the VariableNetwork) whether a blocking `read()` or non-blocking `readLatest()` needs to be used for the initial value: + 1. *control system variable*: `ChimeraTK::TransferElement::read()` + 2. *device register without trigger*: `ChimeraTK::TransferElement::readLatest()` (even if register is push-type). Special treatment required to block until the device is accessible. + 3. *poll-type device register with trigger (incl. `ChimeraTK::VariableNetwork::TriggerType::pollingConsumer`)*: `ChimeraTK::TransferElement::read()` + 4. *constant*: `ChimeraTK::TransferElement::readLatest()` + 5. *application*: `ChimeraTK::TransferElement::read()` +9. The module-like fan outs `ChimeraTK::ThreadedFanOut` and `ChimeraTK::TriggerFanOut` (does not apply to the accessor-like fan outs `ChimeraTK::FeedingFanOut` and `ChimeraTK::ConsumingFanOut`) + 1. The fan outs should have a transparent behaviour, i.e. an entity that receives an initial value through a fan out should see the same behaviour as if a direct connection would have been realised. + 2. This implies that the inputs need to be treated like described in 8.b. + 3. The initial value is propagated immediately to the outputs. + 4. If an output cannot be written at that point (because it writes to a device currently being unavailable), the value propagation to other targets must not be blocked. See recovery mechanism described in @ref exceptionHandlingDesign. +10. Constants (`ChimeraTK::Application::makeConstant()`): + 1. Values are propagated before the `ChimeraTK::ApplicationModule` threads are starting (just like initial values written in `ChimeraTK::ApplicationModule::prepare()`). + 2. Special treatment for constants written to devices: They need to be written after the device is opened (see 6.a), with the same mechanism as in 7.c. +11. Variables with a return channel ("bidirectional variables", `ChimeraTK::ScalarPushInputWB`, `ChimeraTK::ScalarOutputPushRB` and the array variants) behave like their unidirectional pendants, i.e. the existence of the return channel is ignored during the initial value propagation. ### Comments ### -- To 3.: It looks like a conflict with 1., but it is not. Due to 1., all variables will already present itself to the outside as `faulty`. 3. has an impact on the DataValidity of variables written within the module. If a module decides to write a variable even before any inputs are checked, it should be assumed that the written values are valid. Hence the internal validity must start at `ok`. -- To 4.: It is very important that no wrong data is transported initially. Since the "first value" of all process variables is always 0, this value is basically always wrong. If it gets propagated within the application, modules will process this value (usually even if `DataValidity::faulty` is set), despite the value might present an inconsistent state with other process variables. If it gets propagated to the control system, other applications might act on an again inconsistent state. +- It is very important that no wrong data is transported initially. Since the "value after construction" of all process variables is always 0 or so, this value is basically always wrong. If it gets propagated within the application, modules will process this value (usually even if `ChimeraTK::DataValidity::faulty` is set), despite the value might present an inconsistent state with other process variables. If it gets propagated to the control system, other applications might act on an again inconsistent state. - To 5.: This is the responsibility of each control system adpater implementation. -- To 5.a: It is important that the initial values are written before `ApplicationBase::run()` to avoid race conditions if `readLatest()` might be used for the initial values (e.g. in ThreadedFanOuts). This can also be solved differently, if in all these places the same logic as in 8. is applied. +- To 7. and 10.: An output of a `ChimeraTK::ApplicationModule` with an initial value written in `ChimeraTK::ApplicationModule::prepare()` and later never written again behaves in the same way as a constant. +- To 7.b.: In future, the specification could be changed to mitigate the issue of blocking `write`s: It could be specified that all `write`s in the `ChimeraTK::ApplicationModule::mainLoop()` are not blocking due to unavailable devices until the first `read` function has been called. +- To 8.b.: The decision whether to use blocking or non-blocking read for the initial transfer has the following reasons: + - 8.b.i.: Blocking reads prevent race condtion especially in cases where a ThreadedFanOut is involved. + - 8.b.ii.: `ChimeraTK::TransferElement::readLatest()` fetches current value instead of waiting for a new value - see high-level requirements. + - 8.b.iii.: Blocking reads prevent race condtion. + - 8.b.iv.: Blocking reads on constants never return, hence the non-blocking read. + - 8.b.v.: Blocking read required in case the sender writes the initial value during `ChimeraTK::ApplicationModule::mainLoop()`. + ## Implementation ## ### NDRegisterAccessor implementations ### -- 1. must currently be implemented by each NDRegisterAccessor separately. [TBD: Instead of requiring all implementations to be changed, we could also fix this in `Application::createDeviceVariable()`, but this creates an asymetry to the `ProcessArray`...] -- 2. must currently be implemented by each NDRegisterAccessor separately. All accessors should already have a VersionNumber data member called `currentVersion` or similar, it simply needs to be constructed with a `nullptr` as an argument. -- The `UnidirectionalProcessArray` uses always a default start value of `DataValidity::ok`, but overwrites this with `DataValidity::faulty` for the receivers in the factory function `createSynchronizedProcessArray()` (both implementations, see UnidirectionalProcessArray.h). +- Each `ChimeraTK::NDRegisterAccessor` must implement 1. separately. +- Each `ChimeraTK::NDRegisterAccessor` must implement 2. separately. All accessors should already have a `ChimeraTK::VersionNumber` data member called `currentVersion` or similar, it simply needs to be constructed with a `nullptr` as an argument. +- `ChimeraTK::NDRegisterAccessor` must throw exceptions *only* in `TransferElement::postRead()` and `TransferElement::postWrite()`. No exceptions may be thrown in `TransferElement::doReadTransfer()` etc. (all transfer implementations). See also @ref exceptionHandlingDesign. ### ApplicationModule ### -- Needs to implement 3. -- `getDataValidity()` returns `ok` if the `faultCounter` is 0, `faulty` otherwise -- Hence fault counter starts with 0. +- Implement 3.: + - `ChimeraTK::ApplicationModule::getDataValidity()` returns `ok` if the `faultCounter` is 0, otherwise `faulty`. + - All input variables are `faulty` at start. + - Hence fault counter should start at the number of inputs. + - The `ChimeraTK::MetaDataPropagatingDecorator` will count down the fault counter when an `ok` value is received + - Hence `ChimeraTK::ApplicationModule::getDataValidity()` will return `ok` when all inputs have received an `ok` value. +- API documentation must contain 7. +- Implements 8. (hence takes part in 5.a, 6.b, 7 and 10 implicitly): + - All inputs of the module must be read in the `ChimeraTK::ApplicationModule::mainLoopWrapper()` before the call to `mainLoop()`. + - The type of the read decided via `ChimeraTK::VariableNetworkNode::initialValueType()`, which implements 8.b. ### ThreadedFanOut ### -- Needs to implement 3. -- Currently just passing on the validity from the input. -- This is probably going to change when the correct propagation of the validity flag is implemented. +- Implement 3, implementation will be already covered by normal flag propagation +- Needs to implement 9. (hence takes part in 5.a, 6, 7 and 10 implicitly): + - structure the fan out's "mainLoop"-equivalent (`ThreadedFanOut::run()`) like this: + - read initial values (9.a via `ChimeraTK::VariableNetworkNode::initialValueType()`) + - begin loop + - write outputs + - read input + - cycle loop + +### TriggerFanOut ### + +- Implement 3, implementation will be already covered by normal flag propagation +- Needs to implement 9. (hence takes part in 5.a, 6, 7 and 10 implicitly): + - In contrast to the `ThreadedFanOut`, the `TriggerFanOut` has only poll-type data inputs which are all coming from the same device. Data inputs cannot come from non-devices. + - Structure the fan out's "mainLoop"-equivalent (`TriggerFanOut::run()`) like this: + - read initial values of trigger input (9.a via `ChimeraTK::VariableNetworkNode::initialValueType()`) + - begin loop + - read inputs via `ChimeraTK::TransferGroup` + - write outputs + - cycle loop + - 9.d is covered by the `ChimeraTK::ExceptionHandlingDecorator`. It is important that `ChimeraTK::NDRegisterAccessor` throws only in `TransferElement::postRead()` (see implementation section for the `NDRegisterAccessor`), since otherwise the decorator cannot catch the exceptions due to the `TransferGroup`. + +### DeviceModule ### + +All points are also covered by @ref exceptionHandlingDesign. + +- Takes part in 6.a: + - `ChimeraTK::DeviceModule::writeRecoveryOpen` [tbd: new name for the list] is a list of accessors to be written after the device is opened/recovered. +- Takes part in 6.b: + - Initial values are being read from the device by other entities, but these must be blocked until the DeviceModule wakes them up after the device has been opened and initialised. This uses the same mechanism as for blocking read operations during recovery. +- Needs to implement 10.b, done through `ChimeraTK::DeviceModule::writeRecoveryOpen` -> already covered by first point. -### ThreadedFanOut ### +### ExceptionHandlingDecorator ### + +- Must implement part of 6.a/7.c/9.d/10.b: Provide function which allows to write without blocking in case of an unavailable device: + - The list `ChimeraTK::DeviceModule::writeRecoveryOpen` [tbd: new name for the list] is filled with the "recovery accessor" (a "copy" of the original accessor, created by `ChimeraTK::Application::createDeviceVariable()`). This accessor allows the restoration of the last known value of a register after recovery from an exception by the DeviceModule. See also @ref exceptionHandlingDesign. + - When a write happens while the device is still unavailable (not opened or initialisation still in progress), the write should not block (in contrast to normal writes in a `ChimeraTK::ApplicationModule::mainLoop()`). + - The "recovery accessor" is also used in this case to defer the write operation until the device becomes available. + - The actual write is then performed asynchronously by the `ChimeraTK::DeviceModule`. + - This implementation is the same as the implementation for the non-blocking write function [tbd: correct name] which is available to `ChimeraTK::ApplicationModule` user implementations. + +- Needs to implement 6.b.: + - The `ChimeraTK::TransferElement::readLatest()` must be delayed until the device is available and initialised. + - @ref exceptionHandlingDesign states that non-blocking read operations like `ChimeraTK::TransferElement::readLatest()` should never block due to an exception. + - Hence a special treatment is required in this case: + - `ChimeraTK::ExceptionHandlingDecorator::readLatest()` should block until the device is opened and initialised if (and only if) the accessor still has the `ChimeraTK::VersionNumber(nullptr)` - which means it has not yet been read. + +### VariableNetworkNode ### + +- Implements the decision tree mentioned in 8.b. in `ChimeraTK::VariableNetworkNode::initialValueType()` -- Needs to implement 3. +### Application ### +(This section refers to the class `ChimeraTK::Application`, not to the user-defined application.) +- Implements 10.a. 10.b covered by `ChimeraTK::ExceptionHandlingDecorator`. +### ControlSystemAdapter ### + +- Must implement 5.a + - Needs to be done in all adapters separately + +### Non-memeber functions in ApplicationCore ### + +- Convenience function to call the non-blocking write function of the `ChimeraTK::ExceptionHandlingDecorator`, if the accessor is such a decorator. Otherwise the normal write function is called, since might never block due to exception handling anyways. ## Known bugs ## +### DeviceAccess interface ### + +- 1. is currently not implementable for (potentially) bidirectional accessors (like most backend accessors). An interface change is required to allow separete `ChimeraTK::DataValidity` flags for each direction. + ### NDRegisterAccessor implementations ### - 1. is not implemented for Device implementations (only the `UnidirectionalProcessArray` is correct at the moment). - 2. is not implemented for Device implementations (only the `UnidirectionalProcessArray` is correct at the moment). +- Exceptions are currently thrown in the wrong place (see implementation section for the NDRegisterAccessor). A possible implementation to help backends complying with this rule would be: + - Introduce non-virtual `TransferElement::readTransfer()` etc, i.e. all functions like `do[...]Transfer[...]()` should have non-virtual pendants without `do`. + - These new functions will call the actual `do[...]Transfer[...]()` function, but place a try-catch-block around to catch all ChimeraTK exceptions + - The exceptions are stored and operation is continued. In case of boolean return values (`doReadTransferNonBlocking()`, doReadTransferLatest()` and `doWriteTransfer()`), the value for success must be returned (`true` for read and `false` for write), to make sure the corresponding post-action is executed. + - With `TransferElement::postRead()` resp. `TransferElement::postWrite()` non-virtual wrappers for the post-actions already exist. In these functions, the stored exception should be thrown. + - All decorators and decorator-like accessors must be changed to call always the (new or existing) non-virtual functions in their virtual `do[...]` functions. This applies to both the transfer functions and the pre/post actions (for the latter it should be already the case). + - It is advisable to add an assert that no unthrown exception is present before storing a new exception, to prevent that exceptions might get lost due to errors in the business logic. + +### ApplicationModule / EntityOwner ### + +- 3. is not properly implemented, `faultCounter` starts at 0. The correct implementation should increase the counter in `EntityOwner::registerAccessor()` and decrease it in `EntityOwner::unregisterAccessor()` (sorry for the confusing function names...). + +### TriggerFanOut ### + +- 3. is not correctly implemented, it needs to be done on a per-variable level. +- It currently implements its own exception handling (including the `Device::isOpened()` check), but in a wrong way. After the `NDRegisterAccessor` has been fixed, this needs to be removed. + +### DeviceModule ### + +Probably all points are duplicates with @ref exceptionHandlingDesign. + +- Merge `ChimeraTK::DeviceModule::writeAfterOpen/writeRecoveryOpen` lists. +- Implement mechanism to block read/write operations in other threads until after the initialsation is done. ### ExceptionHandlingDecorator ### +Some points are duplicates with @ref exceptionHandlingDesign. + - It waits until the device is opened, but not until after the initialisation is done. +- Provide non-blocking function. +- Implement special treatment for first `readLatest()` operation to always block in the very first call until the device is available. + - Since `readLatest()` is always the first `read`-type function called, it is acceptable if all `read`-type functions implement this behaviour. Choose whatever is easier to implement, update the implementation section of this specification to match the chosen implementation. + +### VariableNetworkNode ### + +- Rename `ChimeraTK::VariableNetworkNode::hasInitialValue()` into `ChimeraTK::VariableNetworkNode::initialValueType()` +- Remove data member storing the presence of an initial value, this is now always the case. Also change `ChimeraTK::VariableNetworkNode::initialValueType()` accordingly. + +### ControlSystemAdapter ### + +- EPICS-Adapter might not properly implement 5.a, needs to be checked. Especially it might not be guaranteed that all variables are written (and not only those registered in the autosave plugin). +- The `ChimeraTK::UnidirectionalProcessArray` uses always a default start value of `DataValidity::ok`, but overwrites this with `DataValidity::faulty` for the receivers in the factory function `ChimeraTK::UnidirectionalProcessArray::createSynchronizedProcessArray()` (both implementations, see UnidirectionalProcessArray.h). This can be solved more elegant after the DeviceAccess interface change described above. + +### Non-memeber functions ### + +- Implement the missing convenience function ### Documentation ### -- Documentation of ControlSystemAdapter should mention that implementations must take care about 5. \ No newline at end of file +- Documentation of ControlSystemAdapter should mention that implementations must take care about 5. +- Documentation for ApplicationModule should mention 7. \ No newline at end of file diff --git a/include/ConstantAccessor.h b/include/ConstantAccessor.h index 3eb285eb5b008eee32faf5c84af24e8ff5c4c3f7..c0391da9855d9d10e2a3d5006baf2720c97d1ff7 100644 --- a/include/ConstantAccessor.h +++ b/include/ConstantAccessor.h @@ -37,7 +37,6 @@ namespace ChimeraTK { return; } // block forever - isInterrupted = false; promise.get_future().wait(); // if we get here, interrupt() has been called throw boost::thread_interrupted(); diff --git a/include/DebugPrintAccessorDecorator.h b/include/DebugPrintAccessorDecorator.h index 6088e0560f955c8a464404a1e127893c9c4a0620..44e597915afa6722fe59c0476d8ea46ad10d1dfb 100644 --- a/include/DebugPrintAccessorDecorator.h +++ b/include/DebugPrintAccessorDecorator.h @@ -52,7 +52,7 @@ namespace ChimeraTK { TransferFuture doReadTransferAsync() override { std::cout << "doReadTransferAsync() called on '" << _fullyQualifiedName << std::endl; - return ChimeraTK::NDRegisterAccessorDecorator<UserType>::readAsync(); + return ChimeraTK::NDRegisterAccessorDecorator<UserType>::doReadTransferAsync(); } void doPreRead() override { diff --git a/src/ModuleImpl.cc b/src/ModuleImpl.cc index 6aa629c74eb6ddb07d7c60a0066ecdd3dd368f2d..88e91bf0ed8d863d97c2db1c094b1166952d95c0 100644 --- a/src/ModuleImpl.cc +++ b/src/ModuleImpl.cc @@ -39,8 +39,13 @@ namespace ChimeraTK { instance = dynamic_cast<ConfigReader*>(mod); } if(nConfigReaders != 1) { - throw ChimeraTK::logic_error("ApplicationModule::appConfig() called but " + std::to_string(nConfigReaders) + - " instances of ChimeraTK::ConfigReader have been found."); + std::string message = "ApplicationModule::appConfig() called but " + std::to_string(nConfigReaders) + + " instances of ChimeraTK::ConfigReader have been found."; + // Printing the message as well; there is a situation when running under Boost::Test where this + // is caught by Boost and causes a weird destructor message from AppBase.cc instead with no means of + // finding out the actual error + std::cerr << message << std::endl; + throw ChimeraTK::logic_error(message); } return *instance; } diff --git a/tests/executables_src/testExceptionHandling.cc b/tests/executables_src/testExceptionHandling.cc index cac0e665f8bc46592cd471629429e40f6aafb204..8c0a7ae2e10a90eac1fef673d04566a9b266a4a7 100644 --- a/tests/executables_src/testExceptionHandling.cc +++ b/tests/executables_src/testExceptionHandling.cc @@ -8,7 +8,6 @@ #include <ChimeraTK/BackendFactory.h> #include <ChimeraTK/Device.h> #include <ChimeraTK/NDRegisterAccessor.h> -#include <ChimeraTK/DummyRegisterAccessor.h> #include <ChimeraTK/ExceptionDummyBackend.h> #include "Application.h" @@ -161,8 +160,8 @@ BOOST_AUTO_TEST_CASE(testExceptionHandlingRead) { boost::shared_ptr<ctk::ExceptionDummy> dummyBackend2 = boost::dynamic_pointer_cast<ctk::ExceptionDummy>( ChimeraTK::BackendFactory::getInstance().createBackend(ExceptionDummyCDD2)); - ChimeraTK::DummyRegisterAccessor<int> readbackDummy1(dummyBackend1.get(), "MyModule", "readBack"); - ChimeraTK::DummyRegisterAccessor<int> readbackDummy2(dummyBackend2.get(), "MyModule", "readBack"); + ctk::Device dev1(ExceptionDummyCDD1); + ctk::Device dev2(ExceptionDummyCDD2); // Connect the whole devices into the control system, and use the control system variable /trigger as trigger for // both devices. The variable becomes a control system to application variable and writing to it through the test @@ -191,8 +190,8 @@ BOOST_AUTO_TEST_CASE(testExceptionHandlingRead) { readback1.read(); readback2.read(); - readbackDummy1 = 42; - readbackDummy2 = 52; + dev1.write<int>("MyModule/readBack.DUMMY_WRITEABLE", 42); + dev2.write<int>("MyModule/readBack.DUMMY_WRITEABLE", 52); // initially there should be no error set trigger.write(); @@ -208,8 +207,8 @@ BOOST_AUTO_TEST_CASE(testExceptionHandlingRead) { // repeat test a couple of times to make sure it works not only once for(size_t i = 0; i < 3; ++i) { // enable exception throwing in test device 1 - readbackDummy1 = 10 + i; - readbackDummy2 = 20 + i; + dev1.write<int>("MyModule/readBack.DUMMY_WRITEABLE", 10 + i); + dev2.write<int>("MyModule/readBack.DUMMY_WRITEABLE", 20 + i); dummyBackend1->throwExceptionRead = true; trigger.write(); CHECK_TIMEOUT(message1.readLatest(), 10000); @@ -226,7 +225,7 @@ BOOST_AUTO_TEST_CASE(testExceptionHandlingRead) { // even with device 1 failing the second one must process the data, so send a new trigger // before fixing dev1 - readbackDummy2 = 120 + i; + dev2.write<int>("MyModule/readBack.DUMMY_WRITEABLE", 120 + i); trigger.write(); BOOST_CHECK(!readback1.readNonBlocking()); // we should not have gotten any new data BOOST_CHECK(readback1.dataValidity() == ChimeraTK::DataValidity::faulty); // But the fault flag should still be set @@ -234,8 +233,8 @@ BOOST_AUTO_TEST_CASE(testExceptionHandlingRead) { BOOST_CHECK_EQUAL(readback2, 120 + i); // Now "cure" the device problem - readbackDummy1 = 30 + i; - readbackDummy2 = 40 + i; + dev1.write<int>("MyModule/readBack.DUMMY_WRITEABLE", 30 + i); + dev2.write<int>("MyModule/readBack.DUMMY_WRITEABLE", 40 + i); dummyBackend1->throwExceptionRead = false; trigger.write(); CHECK_TIMEOUT(message1.readLatest(), 10000); @@ -270,8 +269,8 @@ BOOST_AUTO_TEST_CASE(testExceptionHandlingWrite) { boost::shared_ptr<ctk::ExceptionDummy> dummyBackend2 = boost::dynamic_pointer_cast<ctk::ExceptionDummy>( ChimeraTK::BackendFactory::getInstance().createBackend(ExceptionDummyCDD2)); - ChimeraTK::DummyRegisterAccessor<int> actuatorDummy1(dummyBackend1.get(), "MyModule", "actuator"); - ChimeraTK::DummyRegisterAccessor<int> actuatorDummy2(dummyBackend2.get(), "MyModule", "actuator"); + ctk::Device dev1(ExceptionDummyCDD1); + ctk::Device dev2(ExceptionDummyCDD2); // Connect the whole devices into the control system, and use the control system variable /trigger as trigger for // both devices. The variable becomes a control system to application variable and writing to it through the test @@ -303,8 +302,8 @@ BOOST_AUTO_TEST_CASE(testExceptionHandlingWrite) { actuator2.write(); BOOST_CHECK(!message1.readLatest()); BOOST_CHECK(!status1.readLatest()); - CHECK_TIMEOUT(actuatorDummy1 == 29, 10000); - CHECK_TIMEOUT(actuatorDummy2 == 39, 10000); + CHECK_TIMEOUT(dev1.read<int>("MyModule/actuator") == 29, 10000); + CHECK_TIMEOUT(dev2.read<int>("MyModule/actuator") == 39, 10000); BOOST_CHECK(static_cast<std::string>(message1) == ""); BOOST_CHECK(status1 == 0); @@ -321,22 +320,22 @@ BOOST_AUTO_TEST_CASE(testExceptionHandlingWrite) { BOOST_CHECK(static_cast<std::string>(message1) != ""); BOOST_CHECK_EQUAL(status1, 1); usleep(10000); // 10ms wait time so potential wrong values could have propagated - BOOST_CHECK(actuatorDummy1 == int(30 + i - 1)); // write not done for broken device + BOOST_CHECK(dev1.read<int>("MyModule/actuator") == int(30 + i - 1)); // write not done for broken device // the second device must still be functional BOOST_CHECK(!message2.readNonBlocking()); BOOST_CHECK(!status2.readNonBlocking()); - CHECK_TIMEOUT(actuatorDummy2 == int(40 + i), 10000); // device 2 still works + CHECK_TIMEOUT(dev2.read<int>("MyModule/actuator") == int(40 + i), 10000); // device 2 still works // even with device 1 failing the second one must process the data, so send a new data before fixing dev1 actuator2 = 120 + i; actuator2.write(); - CHECK_TIMEOUT(actuatorDummy2 == int(120 + i), 10000); // device 2 still works + CHECK_TIMEOUT(dev2.read<int>("MyModule/actuator") == int(120 + i), 10000); // device 2 still works // Now "cure" the device problem dummyBackend1->throwExceptionWrite = false; CHECK_TIMEOUT(message1.readLatest(), 10000); CHECK_TIMEOUT(status1.readLatest(), 10000); - CHECK_TIMEOUT(actuatorDummy1 == int(30 + i), 10000); // write is now complete + CHECK_TIMEOUT(dev1.read<int>("MyModule/actuator") == int(30 + i), 10000); // write is now complete BOOST_CHECK_EQUAL(static_cast<std::string>(message1), ""); BOOST_CHECK_EQUAL(status1, 0); } @@ -351,8 +350,14 @@ BOOST_AUTO_TEST_CASE(testExceptionHandlingOpen) { boost::shared_ptr<ctk::ExceptionDummy> dummyBackend2 = boost::dynamic_pointer_cast<ctk::ExceptionDummy>( ChimeraTK::BackendFactory::getInstance().createBackend(ExceptionDummyCDD2)); - ChimeraTK::DummyRegisterAccessor<int> readbackDummy1(dummyBackend1.get(), "MyModule", "readBack"); - ChimeraTK::DummyRegisterAccessor<int> readbackDummy2(dummyBackend2.get(), "MyModule", "readBack"); + ctk::Device dev1(ExceptionDummyCDD1); + ctk::Device dev2(ExceptionDummyCDD2); + dev1.open(); + dev2.open(); + dev1.write<int>("MyModule/readBack.DUMMY_WRITEABLE", 100); + dev2.write<int>("MyModule/readBack.DUMMY_WRITEABLE", 110); + dev1.close(); + dev2.close(); // Connect the whole devices into the control system, and use the control system variable /trigger as trigger for // both devices. The variable becomes a control system to application variable and writing to it through the test @@ -378,8 +383,6 @@ BOOST_AUTO_TEST_CASE(testExceptionHandlingOpen) { auto trigger = test.getScalar<int>("trigger"); - readbackDummy1 = 100; - readbackDummy2 = 110; trigger.write(); //device 1 is in Error state CHECK_TIMEOUT(message1.readLatest(), 10000); @@ -395,7 +398,7 @@ BOOST_AUTO_TEST_CASE(testExceptionHandlingOpen) { // even with device 1 failing the second one must process the data, so send a new trigger // before fixing dev1 - readbackDummy2 = 120; + dev2.write<int>("MyModule/readBack.DUMMY_WRITEABLE", 120); trigger.write(); CHECK_TIMEOUT(readback2.readNonBlocking(), 10000); // device 2 still works BOOST_CHECK_EQUAL(readback2, 120); @@ -476,6 +479,9 @@ BOOST_AUTO_TEST_CASE(testShutdown) { TestApplication2 app; ctk::TestFacility test(false); // test facility without testable mode + ctk::Device dev2(ExceptionDummyCDD2); + ctk::Device dev3(ExceptionDummyCDD3); + // Non zero defaults set here to avoid race conditions documented in // https://github.com/ChimeraTK/ApplicationCore/issues/103 test.setScalarDefault("/Device2/MyModule/actuator", static_cast<int32_t>(DEFAULT)); @@ -496,19 +502,16 @@ BOOST_AUTO_TEST_CASE(testShutdown) { app.realisticModule.mainLoopStarted.wait(); // verify defaults have been written to the device - CHECK_TIMEOUT(dummyBackend2->getRawAccessor("MyModule", "actuator") == static_cast<int32_t>(DEFAULT), 10000); - CHECK_TIMEOUT(dummyBackend2->getRawAccessor("Integers", "signed32") == static_cast<int32_t>(DEFAULT), 10000); - CHECK_TIMEOUT(dummyBackend2->getRawAccessor("Integers", "unsigned32") == static_cast<uint32_t>(DEFAULT), 10000); - CHECK_TIMEOUT(dummyBackend2->getRawAccessor("Integers", "signed16") == static_cast<int16_t>(DEFAULT), 10000); - CHECK_TIMEOUT(dummyBackend2->getRawAccessor("Integers", "unsigned16") == static_cast<uint16_t>(DEFAULT), 10000); - CHECK_TIMEOUT(dummyBackend2->getRawAccessor("Integers", "signed8") == static_cast<int8_t>(DEFAULT), 10000); - CHECK_TIMEOUT(dummyBackend2->getRawAccessor("Integers", "unsigned8") == static_cast<uint8_t>(DEFAULT), 10000); - CHECK_TIMEOUT(dummyBackend2->getRawAccessor("FixedPoint", "value") == 14080, 10000); - CHECK_TIMEOUT( - dummyBackend2->getRawAccessor("Deep/Hierarchies/Need/Tests/As", "well") == static_cast<int32_t>(DEFAULT), 10000); - CHECK_TIMEOUT( - dummyBackend2->getRawAccessor("Deep/Hierarchies/Need/Another", "test") == static_cast<int32_t>(DEFAULT), 10000); - CHECK_TIMEOUT(dummyBackend3->getRawAccessor("MODULE", "REG4") == static_cast<int32_t>(DEFAULT), 10000); + CHECK_TIMEOUT(dev2.read<int32_t>("MyModule/actuator") == DEFAULT, 10000); + CHECK_TIMEOUT(dev2.read<int32_t>("Integers/signed32") == DEFAULT, 10000); + CHECK_TIMEOUT(dev2.read<uint32_t>("Integers/unsigned32") == DEFAULT, 10000); + CHECK_TIMEOUT(dev2.read<int16_t>("Integers/signed16") == DEFAULT, 10000); + CHECK_TIMEOUT(dev2.read<uint16_t>("Integers/unsigned16") == DEFAULT, 10000); + CHECK_TIMEOUT(dev2.read<int8_t>("Integers/signed8") == DEFAULT, 10000); + CHECK_TIMEOUT(dev2.read<uint8_t>("Integers/unsigned8") == DEFAULT, 10000); + CHECK_TIMEOUT(dev2.read<int32_t>("Deep/Hierarchies/Need/Tests/As/well") == DEFAULT, 10000); + CHECK_TIMEOUT(dev2.read<int32_t>("Deep/Hierarchies/Need/Another/test") == DEFAULT, 10000); + CHECK_TIMEOUT(dev3.read<int32_t>("MODULE/REG4") == DEFAULT, 10000); // Wait for the devices to come up. CHECK_EQUAL_TIMEOUT( diff --git a/tests/executables_src/testPropagateDataFaultFlag.cc b/tests/executables_src/testPropagateDataFaultFlag.cc index 578cbc0643635fd55920567d9978f4d11dc26a0d..f75051b01579861bec4872167937a9ddb2e146b1 100644 --- a/tests/executables_src/testPropagateDataFaultFlag.cc +++ b/tests/executables_src/testPropagateDataFaultFlag.cc @@ -569,7 +569,8 @@ struct Fixture_noTestableMode { test.runApplication(); // Making sure the default is written to the device before proceeding. - CHECK_EQUAL_TIMEOUT(int(device1DummyBackend->getRawAccessor("m1", "o1")), DEFAULT, 10000); + auto m1o1 = device1DummyBackend->getRegisterAccessor<int>("m1/o1", 1, 0, false); + CHECK_EQUAL_TIMEOUT((m1o1->read(), m1o1->accessData(0)), DEFAULT, 10000); } ~Fixture_noTestableMode() {