diff --git a/CHANGELOG.md b/CHANGELOG.md index 79f4b08fe05b37d4bb04df1f58dc11fc184110b7..6c1a6967e238149606e5554efdf65d53214c8e92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,15 +1,18 @@ -## 22.03.0 (in progress) +## 22.03.0 FEATURES * Monitoring: Added detailed monitoring and pipeline visualization +* Consumer API: return kDataNotInCache/AsapoDataNotInCacheError error if data is not in cache and cannot be on disk (due to the ingest mode producer used) IMPROVEMENTS * renamed and hid C++ macros from client code -* Consumer API: return kDataNotInCache/AsapoDataNotInCacheError error if data is not in cache and cannot be on disk (due to the ingest mode producer used) BUG FIXES * Producer API: fixed bug segfault in Python code when sending data object which memory is from some other object +VERSION COMPATIBILITY +* Previous C consumer & producer clients will break due to two extra parameters for instance id and pipeline step id in *asapo_create_source_credentials*. + INTERNAL * Do not return error when receiver cannot get slot in shared cache - just allocate own memory slot diff --git a/docs/site/changelog/2022-03-02-22.03.0.md b/docs/site/changelog/2022-03-02-22.03.0.md new file mode 100644 index 0000000000000000000000000000000000000000..6f841f3d1f1e7343bf7b39190206e07a21648e16 --- /dev/null +++ b/docs/site/changelog/2022-03-02-22.03.0.md @@ -0,0 +1,24 @@ +--- +title: Version 22.03.0 +author: Sergey Yakubov +author_title: DESY IT +tags: [release] +--- + +#Changelog for version 22.03.0 + +FEATURES +* Monitoring: Added detailed monitoring and pipeline visualization +* Consumer API: return kDataNotInCache/AsapoDataNotInCacheError error if data is not in cache and cannot be on disk (due to the ingest mode producer used) + +IMPROVEMENTS +* renamed and hid C++ macros from client code + +BUG FIXES +* Producer API: fixed bug segfault in Python code when sending data object which memory is from some other object + +VERSION COMPATIBILITY +* Previous C consumer & producer clients will break due to two extra parameters for instance id and pipeline step id in *asapo_create_source_credentials*. + +INTERNAL +* Do not return error when receiver cannot get slot in shared cache - just allocate own memory slot diff --git a/docs/site/docs/consumer-clients.md b/docs/site/docs/consumer-clients.md index d04c14034585b2ef69f335b0104b2bfafddf69d1..eb5c635d13814dbca51617ae52d5cdbc6abca1fe 100644 --- a/docs/site/docs/consumer-clients.md +++ b/docs/site/docs/consumer-clients.md @@ -10,6 +10,7 @@ Consumer API is available for C++ and Python and has the following main function - Create a consumer instance and bind it to a specific beamtime and data source - multiple instances can be created (also within a single application) to receive data from different sources + - an instance id and pipeline step id can be set to allow pipeline monitoring - a beamtime token is used for access control - If needed (mainly for get_next_XX operations), create a consumer group that allows to process messages independently from other groups - Receive messages from a specific stream (you can read more [here](data-in-asapo) about data in ASAPO) diff --git a/docs/site/docs/getting-started.mdx b/docs/site/docs/getting-started.mdx index 6a45728cd173f631acb0c3f0355a85408d2b2454..f738421eeb764724dd32f0e29d08b8de029f0189 100644 --- a/docs/site/docs/getting-started.mdx +++ b/docs/site/docs/getting-started.mdx @@ -47,7 +47,7 @@ unix socket or a tcp port for communications) at the end you should see -<p className="green-text"><strong>Apply complete! Resources: 19 added, 0 changed, 0 destroyed.</strong></p> +<p className="green-text"><strong>Apply complete! Resources: 25 added, 0 changed, 0 destroyed.</strong></p> which means ASAPO services successfully started. Your ASAPO endpoint for API calls will be **localhost:8400**. diff --git a/docs/site/docs/producer-clients.md b/docs/site/docs/producer-clients.md index d74adb0b60e04ae6cf0ab5b9c2b264a25c46419a..89b421a9960e77bf8495e59b3595ac39e4fb5dea 100644 --- a/docs/site/docs/producer-clients.md +++ b/docs/site/docs/producer-clients.md @@ -9,7 +9,7 @@ Producer client (or producer) is a part of a distributed streaming system that i Producer API is available for C++ and Python and has the following main functionality: - Create a producer instance and bind it to a specific beamtime and data source -multiple instances can be created (also within a single application) to send data from different sources +multiple instances can be created (also within a single application) to send data from different sources. An instance id and pipeline step id can be set to allow pipeline monitoring. - Send messages to a specific stream (you can read more [here](data-in-asapo) about data in ASAPO) - each message must have a consecutive integer index, ASAPO does not create indexes automatically - to compose datasets, dataset substream (and dataset size) should be send along with each message diff --git a/docs/site/examples/c/consume.c b/docs/site/examples/c/consume.c index 6d00974d168528e7478bd158cfac7a7852c4bc28..e6cfb0957044aecd7ec21b297d808898650af18b 100644 --- a/docs/site/examples/c/consume.c +++ b/docs/site/examples/c/consume.c @@ -26,7 +26,7 @@ int main(int argc, char* argv[]) { const char * path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test"; //set it according to your configuration. AsapoSourceCredentialsHandle cred = asapo_create_source_credentials(kProcessed, - "auto", "auto", beamtime, + "test_consumer_instance", "pipeline_step_1", beamtime, "", "test_source", token); AsapoConsumerHandle consumer = asapo_create_consumer(endpoint, path_to_files, 1, diff --git a/docs/site/examples/cpp/consume.cpp b/docs/site/examples/cpp/consume.cpp index f13db95c0520395cd781660459a4557374d1460b..41b48881e9b2d831756c7788d83d2a6f16c42314 100644 --- a/docs/site/examples/cpp/consume.cpp +++ b/docs/site/examples/cpp/consume.cpp @@ -29,11 +29,13 @@ int main(int argc, char* argv[]) { auto credentials = asapo::SourceCredentials { + "test_consumer_instance", // # conumser instance id (can be "auto") + "pipeline_step_1", // # pipeline step id asapo::SourceType::kProcessed, // should be kProcessed or kRaw, kProcessed writes to the core FS beamtime, // the folder should exist "", // can be empty or "auto", if beamtime_id is given "test_source", // source - token // athorization token + token // authorization token }; auto consumer = asapo::ConsumerFactory::CreateConsumer diff --git a/docs/site/examples/cpp/produce.cpp b/docs/site/examples/cpp/produce.cpp index 157d53d060d731827ec42c751f54f7aa310f268f..d47f324c38f30b2ce209500b02c18a4a0a0cf588 100644 --- a/docs/site/examples/cpp/produce.cpp +++ b/docs/site/examples/cpp/produce.cpp @@ -34,11 +34,13 @@ int main(int argc, char* argv[]) { auto credentials = asapo::SourceCredentials { + "test_producer_instance", // # producer instance id (can be "auto") + "pipeline_step_1", // # pipeline step id asapo::SourceType::kProcessed, // should be kProcessed or kRaw, kProcessed writes to the core FS beamtime, // the folder should exist "", // can be empty or "auto", if beamtime_id is given "test_source", // source - "" // athorization token + "" // authorization token }; auto producer = asapo::Producer::Create(endpoint, diff --git a/docs/site/examples/python/consume.py b/docs/site/examples/python/consume.py index 6180fef9ee83e0a37d85ae338d322fcc5248a41a..1b095f6d3d72fc734a3d2208212e75efe8fb2d36 100644 --- a/docs/site/examples/python/consume.py +++ b/docs/site/examples/python/consume.py @@ -22,7 +22,10 @@ consumer = asapo_consumer \ beamtime, # Same as for the producer "test_source", # Same as for the producer token, # Access token - 5000) # Timeout. How long do you want to wait on non-finished stream for a message. + 5000, # Timeout. How long do you want to wait on non-finished stream for a message. + 'test_consumer_instance', # conumser instance id (can be 'auto') + 'pipeline_step_1' # pipeline step id + ) #create snippet_end #list snippet_start diff --git a/docs/site/examples/python/produce.py b/docs/site/examples/python/produce.py index 262015b25fd99be947f1756222d6a74a1bb54acb..badc65110e05d525fec61adb2818efc4c44fb0c3 100644 --- a/docs/site/examples/python/produce.py +++ b/docs/site/examples/python/produce.py @@ -23,9 +23,12 @@ producer = asapo_producer \ beamtime, # the folder should exist 'auto', # can be 'auto', if beamtime_id is given 'test_source', # source - '', # athorization token + '', # authorization token 1, # number of threads. Increase, if the sending speed seems slow - 60000) # timeout. Do not change. + 60000, # timeout. Do not change. + 'test_producer_instance', # producer instance id (can be 'auto') + 'pipeline_step_1' # pipeline step id + ) producer.set_log_level("error") # other values are "warning", "info" or "debug". # create snippet_end diff --git a/docs/site/examples/start_asapo_socket.sh b/docs/site/examples/start_asapo_socket.sh index 0150a44a8898ad3cc282decdb4f99f52ab82641e..6acf518525f7c250537249417e415177927a583a 100644 --- a/docs/site/examples/start_asapo_socket.sh +++ b/docs/site/examples/start_asapo_socket.sh @@ -35,4 +35,4 @@ docker run --privileged --rm -v /var/run/docker.sock:/var/run/docker.sock \ --name asapo --net=host -d yakser/asapo-cluster-dev:100.0.develop sleep 15 -docker exec asapo jobs-start -var elk_logs=false -var influxdb_version=1.8.4 +docker exec asapo jobs-start diff --git a/docs/site/examples/start_asapo_tcp.sh b/docs/site/examples/start_asapo_tcp.sh index 4debb8e82089a4dc0d4bdb9fa744c41afa2b67f4..be538e088a9ce51cc5e5749a8ee37c61f42621ca 100644 --- a/docs/site/examples/start_asapo_tcp.sh +++ b/docs/site/examples/start_asapo_tcp.sh @@ -44,4 +44,4 @@ docker run --privileged --userns=host --security-opt no-new-privileges --rm \ --name asapo --net=host -d yakser/asapo-cluster-dev:100.0.develop sleep 15 -docker exec asapo jobs-start -var elk_logs=false +docker exec asapo jobs-start diff --git a/docs/site/versioned_docs/version-22.03.0/compare-to-others.md b/docs/site/versioned_docs/version-22.03.0/compare-to-others.md new file mode 100644 index 0000000000000000000000000000000000000000..97e0bd7fc30ae8166e3493742164a3bb6044fc31 --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/compare-to-others.md @@ -0,0 +1,49 @@ +--- +title: Comparison to Other Solutions +--- + +Here we consider how ASAPO is different from other workflows practiced at DESY. The possible candidates are: + +### Filesystem +Probably the most often used approach for now. Files are written to the beamline filesystem directly via NFS/SMB mount or by HiDRA and copied to the core filesystem by a copy daemon. A user (software) then reads the files from the filesystem. + +### Filesystem + Kafka +Previous workflow + there is a Kafka instance that produces messages when a file appears in the core filesystem. These messages can then be consumed by user software. + +### HiDRA +HiDRA can work in two modes - wether data is transferred via it or data is written over NFS/SMB mounts and HiDRA monitors a folder in a beamline filesystem. In both case one can subscribe to HiDRA's data queue to be informed about a new file. + +### ASAPO + +ASAPO does not work with files, rather with data streams. Well, behind the scene it does use files, but in principle what a user see is a stream of messages, where a message typically consists of metadata and data blobs. Data blob can be a single image, a file with arbitrary content or whatever else, even null. Important is - what goes to one end, appears on the other. And that each message must have a consecutive index. These messages must be ingested to an ASAPO data stream in some way (e.g. using ASAPO Producer API or HiDRA) and data, if not yet, will be transferred and stored in the data center. A user can then read the messages from the stream and process it in a way he likes. + +### Compare by categories + +In the table below we compare the approaches from different points of view and in the [next table](#compare-by-features) we compare the approaches by available features. + +| Category | Filesystem | Filesystem+Kafka | HiDRA | ASAPO | +|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Data ingest | traditional way - write to disk | same as Filesystem, additionally a message is send to a Kafka topic with basic file metadata (name, size, creation date, ...) | outperforms data ingest via NFS/SMB, uses ZeroMQ(TCP), saturates 10GE bandwidth. To be tested with 100GE. Same metadata as with Kafka | uses parallel TCP connections & in the future RDMA. Can add arbitrary metadata (JSON format). Can send data to various streams to create arbitrary data hierarchy (e.g. stream per scan). Saves data to a memory buffer and on disk. | +| Offline analysis | traditional way - read from disk. Need to know the filename to read, usually not a problem. | same as Filesystem, there is no benefits reading Kafka queue after all data is arrived | not possible, need to fallback to Filesystem | same as online analysis (see below) | +| Online analysis | no efficient way to recognise that new data is available - periodically read folder content and compare with previous state?, periodically check file appeared? | using a subscription to a "new files" topic, a user software can be made aware of new data quite soon and react correspondingly. | one can subscribe to a HiDRA's ZeroMQ stream and consume data. If data arrives faster than it can be processed or e.g. user software crashes - data might be skipped. | one can get data from various streams in different ways - get next unprocessed message ordered by index, get last, get by id. Since everything is stored in persistent storage, processing is possible with arbitrary slow (but also fast) consumers. Resilient to connections loss or consumer crashes. | +| Performance | as good as read/write to disk. Can be an issue, especially with new detectors 100GE+ networks. | as good as read/write to disk + some latency for the file to be written to the beamline filesystem, copied to the core filesystem and a message to go through Kafka. | data is available as soon as it is received by HiDRA. If multiple consumer groups want to read same data (one consumer is known - the process that writes the file), data will be transferred multiple times, which influences the network performance. | data is available to be consumed as soon as it is arrived and saved to beamline filesystem (later can be optimised by using persistent memory instead). No need to read from disk since it also remains in memory buffer. | +| Parallelisation | Parallelisation is easily possible e.g. with an MPI library. | Parallelisation is possible if Kafka's topics are partitioned (which is not the case in the moment) | Not out of the box, possible with some changes from user's side | Parallelisation is easily possible, one can consume data concurrently with different consumers from the same stream. Normally, synchronisation between consumers is not needed, but this might depend on a use case. When configured, data can be resent if not acknowledged during a specified time period. | +| Search/filter | hardly possible, manually parsing some metadata file, using POSIX commands? | same as Filesystem. There is Kafka SQL query language which could be used if there would be metadata in messages, which is not the case (yet?). | not possible | one can use a set of SQL queries to ingested metadata. | +| General comments | Might be ok for slow detectors or/and a use case without online analysis requirements. Might be the only way to work with legacy applications | Fits well for the cases where a software just need a trigger that some new data has arrived, when processing order, extra metadata, parallelisation is not that important or implemented by other means. Some delay between an image is generated and the event is emitted by Kafka is there, but probably not that significant (maximum a couple of seconds). Might be not that appropriate for very fast detectors since still using filesystem to write/read data. | Works quite well to transfer files from detector to the data center. Also a good candidate for live viewers, where the last available "image" should be displayed. Does not work for offline analysis or for near real-time analysis where image processing can take longer than image taking. | Tries to be a general solution which improves in areas where other approaches not suffice: single code for offline/near real-time/online analysis, parallelisation, extended metadata, efficient memory/storage management, getting data without access to filesystem (e.g. from detector pc without core filesystem mounted), computational pipelines, ... Everything has its own price: user software must be modified to use ASAPO, a wrapper might be needed for legacy software that cannot be modified, user/beamtime scientist should better structure the data - e.g. consecutive indexes must be available for each image, one has to define to which stream write/read data, what is the format of the data, ... | + + +### Compare by features + +| Feature | Filesystem | Filesystem+Kafka | HiDRA | ASAPO | +|--------------------------------------------------------------------------------------------------------------------|------------------------------------|------------------------------------|------------------------------------|------------------------------------| +| send metadata with image | No | No | No | Yes | +| get last image | No | No | Yes | Yes | +| get image by id | No | No | No | Yes | +| get image in order | No | No | No | Yes | +| Immediately get informed that a new image is arrived | No | Yes | Yes | Yes | +| access image remotely, without reading filesystem | No | No | Yes, if it is still in buffer | Yes | +| access past images | Yes | Yes | No | Yes | +| need to change user code | No | Yes | Yes | Yes | +| parallelisation | Yes (if user software allows that) | Not out of the box | Not out of the box | Yes | +| legacy applications | Yes | No (wrapper could be a workaround) | No (wrapper could be a workaround) | No (wrapper could be a workaround) | +| transparent restart/continuation of simulations in case e.g. worker process crashes, also for parallel simulations | Not out of the box | Yes | No | Yes | diff --git a/docs/site/versioned_docs/version-22.03.0/consumer-clients.md b/docs/site/versioned_docs/version-22.03.0/consumer-clients.md new file mode 100644 index 0000000000000000000000000000000000000000..eb5c635d13814dbca51617ae52d5cdbc6abca1fe --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/consumer-clients.md @@ -0,0 +1,34 @@ +--- +title: Consumer Clients +--- + +Consumer client (or consumer) is a part of a distributed streaming system that is responsible for processing streams of data that were created by producer. It is usually a user (beamline scientist, detector developer, physicist, ... ) responsibility to develop a client for specific beamline, detector or experiment using ASAPO Consumer API and ASAPO responsibility to make sure data is delivered to consumers in an efficient and reliable way. + + + +Consumer API is available for C++ and Python and has the following main functionality: + +- Create a consumer instance and bind it to a specific beamtime and data source + - multiple instances can be created (also within a single application) to receive data from different sources + - an instance id and pipeline step id can be set to allow pipeline monitoring + - a beamtime token is used for access control +- If needed (mainly for get_next_XX operations), create a consumer group that allows to process messages independently from other groups +- Receive messages from a specific stream (you can read more [here](data-in-asapo) about data in ASAPO) + - GetNext to receive process messages one after another without need to know message indexes + - Consumer API returns a message with index 1, then 2, ... as they were set by producer. + - This also works in parallel so that payload is distributed within multiple consumers within same consumer group or between threads of a single consumer instance. In parallel case order of indexes of the messages is not determined. + - GetLast to receive last available message - for e.g. live visualisation + - GetById - get message by index - provides random access +- Make queries based on metadata contained in a message - returns all messages in a stream with specific metadata. A subset of SQL language is used + + +All of the above functions can return only metadata part of the message, so that an application can e.g. extract the filename and pass it to a 3rd party tool for processing. Alternative, a function may return the complete message with metadata and data so that consumer can directly process it. An access to the filesystem where data is actually stored is not required in this case. + +:::note +In case of dataset family of functions, only list of dataset messages is returned, the data can be retrieved in a separate call. +::: + +Please refer to [C++](http://asapo.desy.de/cpp/) and [Python](http://asapo.desy.de/python/) documentation for specific details (available from DESY intranet only). + + + diff --git a/docs/site/versioned_docs/version-22.03.0/cookbook/acknowledgements.mdx b/docs/site/versioned_docs/version-22.03.0/cookbook/acknowledgements.mdx new file mode 100644 index 0000000000000000000000000000000000000000..a1e3810868b5efaf01ea9762b0e9aa0812f7f976 --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/cookbook/acknowledgements.mdx @@ -0,0 +1,63 @@ +--- +title: Acknowledgements +--- + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +While consuming the messages we could issue acknowledgements, to denote that the messages were (or were not) processed successfully. + +Here is the snippet that expects 10 sample messages in the default stream. When consuming the messages, the message #3 receives a negative acknowledgement, which puts is back in the stream for the repeated processing, and the messages 5 and 7 remain unacknowledged. On the second attempt the message #3 gets acknowledged. + +You can found the full example in git repository. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/acknowledgements.py" snippetTag="consume" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/acknowledgements.cpp" snippetTag="consume" +``` + +</TabItem> +</Tabs> + +The list of unacknowledged messages can be accessed at any time. This snippet prints the list of unacknowledged messages. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/acknowledgements.py" snippetTag="print" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/acknowledgements.cpp" snippetTag="print" +``` + +</TabItem> +</Tabs> + +The output will show the order in which the messages receive their acknowledgements. You may notice that the second acknowledgement of the message #3 happens with a delay, which was deliberatly chosen. The unacknowledged messages are retrieved separately at the end, after the consumer timeout. diff --git a/docs/site/versioned_docs/version-22.03.0/cookbook/datasets.mdx b/docs/site/versioned_docs/version-22.03.0/cookbook/datasets.mdx new file mode 100644 index 0000000000000000000000000000000000000000..3c465613aa9131693a46bd2231e9bd0550d96f30 --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/cookbook/datasets.mdx @@ -0,0 +1,67 @@ +--- +title: Datasets +--- + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +The messages in the stream can be multi-parted. If you have several producers (e.g. sub-detectors) that produces several parts of the single message, you can use datasets to assemble a single message from several parts. + +## Dataset Producer + +Here is the code snippet that can be used to produce a three-parted dataset. The full usable example can be found in git repository. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/produce_dataset.py" snippetTag="dataset" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/produce_dataset.cpp" snippetTag="dataset" +``` + +</TabItem> +</Tabs> + +You should see the "successfuly sent" message in the logs, and the file should appear in the corresponding folder (by default in ```/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test```) + +## Dataset Consumer + +Here is the snippet that can be used to consume a dataset. The full example is also in git. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/consume_dataset.py" snippetTag="dataset" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/consume_dataset.cpp" snippetTag="dataset" +``` + +</TabItem> +</Tabs> + +The details about the received dataset should appear in the logs, together with the message "stream finished" (if the "finished" flag was sent for the stream). The "stream ended" message will appear for non-finished streams, but may also mean that the stream does not exist (or was deleted). diff --git a/docs/site/versioned_docs/version-22.03.0/cookbook/metadata.mdx b/docs/site/versioned_docs/version-22.03.0/cookbook/metadata.mdx new file mode 100644 index 0000000000000000000000000000000000000000..4cb6e986fc9774643e7d95ffa3c72b749e08ad4c --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/cookbook/metadata.mdx @@ -0,0 +1,202 @@ +--- +title: Metadata +--- + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +You can also store any custom metadata with your beamtime, stream, and each message. This tutorial shows you how you can store, update and access this metadata. The metadata is stored in JSON, and any JSON structure is supported. + +:::info +Since C++ doesn't have a built-in JSON support, you'd have to use 3rd party libs if you want JSON parsing. In this tutorial we won't use any JSON parsing for C++, and will treat JSONs as regular strings. Please note, that ASAP::O only supports valid JSONs, and providing invalid input will result in error. +::: + + +## Send Metadata + +The following snippet shows how to send the beamtime metadata. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/metadata.py" snippetTag="beamtime_set" +``` + +</TabItem> + +<TabItem value="cpp"> + + +```cpp content="./versioned_examples/version-22.03.0/cpp/metadata.cpp" snippetTag="beamtime_set" +``` + +</TabItem> +</Tabs> + +Each metadata can be updated at any moment. Here is the example on how to do it with beamtime metadata. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/metadata.py" snippetTag="beamtime_update" +``` + +</TabItem> + +<TabItem value="cpp"> + + +```cpp content="./versioned_examples/version-22.03.0/cpp/metadata.cpp" snippetTag="beamtime_update" +``` + +</TabItem> +</Tabs> + +The same way the metadata can be set for each stream. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/metadata.py" snippetTag="stream_set" +``` + +</TabItem> + +<TabItem value="cpp"> + + +```cpp content="./versioned_examples/version-22.03.0/cpp/metadata.cpp" snippetTag="stream_set" +``` + +</TabItem> +</Tabs> + +And for each message + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/metadata.py" snippetTag="message_set" +``` + +</TabItem> + +<TabItem value="cpp"> + + +```cpp content="./versioned_examples/version-22.03.0/cpp/metadata.cpp" snippetTag="message_set" +``` + +</TabItem> +</Tabs> + +## Read Metadata + +Here we will read the beamtime metadata. In this example it will already incorporate the changes we did during the update + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/metadata.py" snippetTag="beamtime_get" +``` + +</TabItem> + +<TabItem value="cpp"> + + +```cpp content="./versioned_examples/version-22.03.0/cpp/metadata.cpp" snippetTag="beamtime_get" +``` + +</TabItem> +</Tabs> + +Same for the stream. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/metadata.py" snippetTag="stream_get" +``` + +</TabItem> + +<TabItem value="cpp"> + + +```cpp content="./versioned_examples/version-22.03.0/cpp/metadata.cpp" snippetTag="stream_get" +``` + +</TabItem> +</Tabs> + +And for the message. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/metadata.py" snippetTag="message_get" +``` + +</TabItem> + +<TabItem value="cpp"> + + +```cpp content="./versioned_examples/version-22.03.0/cpp/metadata.cpp" snippetTag="message_get" +``` + +</TabItem> +</Tabs> + +The output will show the metadata retrieved from the beamtime, stream and message. diff --git a/docs/site/versioned_docs/version-22.03.0/cookbook/next_stream.mdx b/docs/site/versioned_docs/version-22.03.0/cookbook/next_stream.mdx new file mode 100644 index 0000000000000000000000000000000000000000..00db7e208a4a5229fa9124f648534f51013d0641 --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/cookbook/next_stream.mdx @@ -0,0 +1,61 @@ +--- +title: Stream Finishing +--- + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +When all the data in the stream is sent, the stream can be finished, and it is posiible to set the "next stream" to follow up the first. In this tutorial it'll be shown how several streams can be chained together in single consumer by using the stream finishing. + +The setting of the next stream is done by providing an additional parameter while finishing the stream + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/next_stream.py" snippetTag="next_stream_set" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/next_stream.cpp" snippetTag="next_stream_set" +``` + +</TabItem> +</Tabs> + +The reading of the streams can be then chained together. When one stream finishes, and the next stream is provided, the reading of the next stream can immediately start. This example will read the whole chain of streams, until it encounters the non-finished stream, or the stream that was finished without the ```next```. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/next_stream.py" snippetTag="read_stream" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/next_stream.cpp" snippetTag="read_stream" +``` + +</TabItem> +</Tabs> + +The output will show the messages being consumed from the streams in order. For this example (full file can be found in git repository) it'll be first the ```default``` stream, then the ```next```. diff --git a/docs/site/versioned_docs/version-22.03.0/cookbook/overview.md b/docs/site/versioned_docs/version-22.03.0/cookbook/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..97db96a408a253119c2091af786a8a3bd3820726 --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/cookbook/overview.md @@ -0,0 +1,13 @@ +--- +title: Code Examples Overview +--- + +Here you can find the code examples for various common asapo usecases. Make sure that the ASAP::O instance and client libraries are properly installed, see [Getting Started page](../) for details. + +For the most basic usecase, see the [Simple Producer](simple-producer) and [Simple Consumer](simple-consumer). There are also the basic examples of CMake and makefile configurations for client compilation. + +The API documentation can be found [here](http://asapo.desy.de/cpp) (for C++) or [here](http://asapo.desy.de/python) (for python). + +:::tip +You can see more examples in ASAPO [source code](https://stash.desy.de/projects/ASAPO/repos/asapo/browse/examples) +::: diff --git a/docs/site/versioned_docs/version-22.03.0/cookbook/query.mdx b/docs/site/versioned_docs/version-22.03.0/cookbook/query.mdx new file mode 100644 index 0000000000000000000000000000000000000000..25054e8529c2ad63458cb97ed62b0e4f23e936bc --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/cookbook/query.mdx @@ -0,0 +1,142 @@ +--- +title: Message query +--- + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +Messages in streams can be retrieved based on their metadata. Both the technical information (e.g. ID or timestamp) and the user metadata (see [this tutorial](metadata) for details) can be used to make a query. In this tutorial several examples of the queries are shown. The standard SQL sysntaxis is used. + +For this example we expect several messages in the default stream with the metadata consisting of two fields: a string named ```condition``` and an integer named ```somevalue```. Go to the git repository for the full example. + +:::info +Keep in mind, that the query requests return only the list of metadatas for the found messages, not the messages itself. You need to explicitly retrieve the actual data for each message. +::: + +Here we can pick a message with the specific ID. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/query.py" snippetTag="by_id" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/query.cpp" snippetTag="by_id" +``` + +</TabItem> +</Tabs> + +We can also use the simple rule for picking a range of IDs + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/query.py" snippetTag="by_ids" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/query.cpp" snippetTag="by_ids" +``` + +</TabItem> +</Tabs> + +We can query the messages based on their metadata, for example request a specific value of the string field. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/query.py" snippetTag="string_equal" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/query.cpp" snippetTag="string_equal" +``` + +</TabItem> +</Tabs> + +We can also require some more complex constraints on the metadata, e.g. a range for an integer field + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/query.py" snippetTag="int_compare" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/query.cpp" snippetTag="int_compare" +``` + +</TabItem> +</Tabs> + +Since every message comes with a timestamp, we can make constraints on it as well. For example, request all the messages from the last 15 minutes. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/query.py" snippetTag="timestamp" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/query.cpp" snippetTag="timestamp" +``` + +</TabItem> +</Tabs> + +The output of the full example will show the message selection together with the conditions used for selection. diff --git a/docs/site/versioned_docs/version-22.03.0/cookbook/simple-consumer.mdx b/docs/site/versioned_docs/version-22.03.0/cookbook/simple-consumer.mdx new file mode 100644 index 0000000000000000000000000000000000000000..9152225de7fe2c4e7678e523de7f1ece9990f1c2 --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/cookbook/simple-consumer.mdx @@ -0,0 +1,195 @@ +--- +title: Simple Consumer +--- + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +This example shows how to consume a message. This page provides snippets for simple consumer. You can go to BitBucket to see the whole example at once. The files there is a working example ready for launch. + +A special access token is needed to create a consumer. For the purpose of this tutorial a special "test" token is used. It will only work for the beamtime called "asapo_test". + +First step is to create an instance of the consumer. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + { label: 'C', value: 'c', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/consume.py" snippetTag="create" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/consume.cpp" snippetTag="create" +``` + +</TabItem> + +<TabItem value="c"> + +```c content="./versioned_examples/version-22.03.0/c/consume.c" snippetTag="create" +``` + +</TabItem> + +</Tabs> + +You can list all the streams within the beamtime. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + { label: 'C', value: 'c', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/consume.py" snippetTag="list" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/consume.cpp" snippetTag="list" +``` + +</TabItem> + +</Tabs> + +The actual consuming of the message will probably be done in a loop. Here is an example how such a loop could be organized. It will run until the stream is finished, or no new messages are received within the timeout. + +You need to use the group ID that can be used by several consumer in parallel. You can either generate one or use a random string. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + { label: 'C', value: 'c', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/consume.py" snippetTag="consume" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/consume.cpp" snippetTag="consume" +``` + +</TabItem> + +<TabItem value="c"> + +```c content="./versioned_examples/version-22.03.0/c/consume.c" snippetTag="consume" +``` + +</TabItem> + +</Tabs> + +After consuming the stream you can delete it. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + { label: 'C', value: 'c', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/consume.py" snippetTag="delete" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/consume.cpp" snippetTag="delete" +``` + +</TabItem> + +<TabItem value="c"> + +```c content="./versioned_examples/version-22.03.0/c/consume.c" snippetTag="delete" +``` + +</TabItem> + +</Tabs> + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + { label: 'C', value: 'c', }, + ] +}> +<TabItem value="python"> +For Python example just launch it with python interpreter (be sure that the ASAP::O client python modules are installed) + +``` +$ python3 consumer.py +``` + +</TabItem> + +<TabItem value="cpp"> +For C++ example you need to compiled it first. The easiest way to do it is by installing ASAP::O client dev packages and using the CMake find_package function. CMake will generate the makefile that you can then use to compile the example. + +The example CMake file can look like this + +```cmake content="./versioned_examples/version-22.03.0/cpp/CMakeLists.txt" snippetTag="#consumer" +``` + +You can use it like this + +```bash +$ cmake . && make +$ ./asapo-consume +``` + +</TabItem> + +<TabItem value="c"> +Compile e.g. using Makefile and pkg-config (although we recommend CMake - see C++ section) and execute. This example assumes asapo is installed to /opt/asapo. Adjust correspondingly. + +```makefile content="./versioned_examples/version-22.03.0/c/Makefile" snippetTag="#consumer" +``` + +``` +$ make +$ ./asapo-consume +``` + + +</TabItem> + +</Tabs> + +The details about the received message should appear in the logs, together with the message "stream finished" (if the "finished" flag was sent for the stream). The "stream ended" message will appear for non-finished streams, but may also mean that the stream does not exist (or was deleted). diff --git a/docs/site/versioned_docs/version-22.03.0/cookbook/simple-pipeline.mdx b/docs/site/versioned_docs/version-22.03.0/cookbook/simple-pipeline.mdx new file mode 100644 index 0000000000000000000000000000000000000000..f299c68c59ebf73e2279c28bd2bf061728e266e9 --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/cookbook/simple-pipeline.mdx @@ -0,0 +1,61 @@ +--- +title: Simple Pipeline +--- + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +The consumer and a producer could be combined together in order to create pipelines. Look at the corresponding examples to learn about producers and consumers in detailes. + +Here is the snippet that shows how to organize a pipelined loop. The full runnable example can be found in git repository. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/pipeline.py" snippetTag="pipeline" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/pipeline.cpp" snippetTag="pipeline" +``` + +</TabItem> +</Tabs> + +Just like with any produced stream, the pipelined stream can be marked as "finished". Here's the snippet that shows how to access the last message id in the stream. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/pipeline.py" snippetTag="finish" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/pipeline.cpp" snippetTag="finish" +``` + +</TabItem> +</Tabs> + +The details about the received message should appear in the logs, together with the message "stream finished" (if the "finished" flag was sent for the stream). The "stream ended" message will appear for non-finished streams, but may also mean that the stream does not exist (or was deleted). The processed file should appear in the corresponding folder (by default in ```/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test```) diff --git a/docs/site/versioned_docs/version-22.03.0/cookbook/simple-producer.mdx b/docs/site/versioned_docs/version-22.03.0/cookbook/simple-producer.mdx new file mode 100644 index 0000000000000000000000000000000000000000..b839ed3487ed07baceeb22a4e1e2b2f50d959fcc --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/cookbook/simple-producer.mdx @@ -0,0 +1,148 @@ +--- +title: Simple Producer +--- + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +This example produces a simple message. This page provides snippets for simple producer for both Python and C++. You can go to BitBucket to see the whole example at once. The files there is a working example ready for launch. + +First step is to create an instance of the producer. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/produce.py" snippetTag="create" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/produce.cpp" snippetTag="create" +``` + +</TabItem> +</Tabs> + +Then, we need to define a callback that would be used for sending. The callback is called when the message is actually sent, which may happen with a delay. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/produce.py" snippetTag="callback" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/produce.cpp" snippetTag="callback" +``` + +</TabItem> +</Tabs> + +Next we schedule the actual sending. This function call does not perform the actual sending, only schedules it. The sending will happen in background, and when it is done the callbeack will be called (if provided). + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/produce.py" snippetTag="send" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/produce.cpp" snippetTag="send" +``` + +</TabItem> +</Tabs> + +The sending of the messages will probably be done in a loop. After all the data is sent, some additional actions might be done. You may want to wait for all the background requests to be finished before doing something else or exiting the application. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> + +```python content="./versioned_examples/version-22.03.0/python/produce.py" snippetTag="finish" +``` + +</TabItem> + +<TabItem value="cpp"> + +```cpp content="./versioned_examples/version-22.03.0/cpp/produce.cpp" snippetTag="finish" +``` + +</TabItem> +</Tabs> + +You can get the full example from BitBucket and test it locally. + +<Tabs + groupId="language" + defaultValue="python" + values={[ + { label: 'Python', value: 'python', }, + { label: 'C++', value: 'cpp', }, + ] +}> +<TabItem value="python"> +For Python example just launch it with python interpreter (be sure that the ASAP::O client python modules are installed). + +```bash +$ python3 produce.py +``` + +</TabItem> + +<TabItem value="cpp"> +For C++ example you need to compiled it first. The easiest way to do it is by installing ASAP::O client dev packages and using the CMake find_package function. CMake will generate the makefile that you can then use to compile the example. + +The example CMake file can look like this. + +```cmake content="./versioned_examples/version-22.03.0/cpp/CMakeLists.txt" snippetTag="#producer" +``` + +You can use it like this. + +```bash +$ cmake . && make +$ ./asapo-produce +``` + +</TabItem> +</Tabs> + +You should see the "successfuly sent" message in the logs, and the file should appear in the corresponding folder (by default in ```/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test```). diff --git a/docs/site/versioned_docs/version-22.03.0/core-architecture.md b/docs/site/versioned_docs/version-22.03.0/core-architecture.md new file mode 100644 index 0000000000000000000000000000000000000000..a02048e96d33c687f38622661b735974541a36e4 --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/core-architecture.md @@ -0,0 +1,29 @@ +--- +title: Core Architecture +--- + +For those who are curious about ASAPO architecture, the diagram shows some details. Here arrows with numbers is an example of data workflow explained below. + + + +## Data Workflow (example) +the workflow can be split into two more or less independent tasks - data ingestion and data retrieval + +### Data ingestion (numbers with i on the diagram) +1i) As we [know](producer-clients.md), producer client is responsible for ingesting data in the system. Therefore the first step is to detect that the new message is available. This can be done using another tool developed at DESY named [HiDRA](https://confluence.desy.de/display/FSEC/HiDRA). This tool monitors the source of data (e.g. by monitoring a filesystem or using HTTP request or ZeroMQ streams, depending on detector type) + +2i) HiDRA (or other user application) then uses ASAPO Producer API to send messages (M1 and M2 in our case) in parallel to ASAPO Receiver. TCP/IP or RDMA protocols are used to send data most efficiently. ASAPO Receiver receives data in a memory cache + +3i) - 4i) ASAPO saves data to a filesystem and adds a metadata record to a database + +5i) A feedback is send to the producer client with success or error message (in case of error, some of the step above may not happen) + +### Data retrieval (numbers with r on the diagram) + +[Consumer client](consumer-clients.md)) is usually a user application that retrieves data from the system to analyse/process it. + +The first step to retrieve a message via Consumer API is to pass the request to the Data Broker (1r). The Data Broker retrieves the metadata information about the message from the database (2r) and returns it to the Consumer Client. The Consumer Client analyses the metadata information and decides how to get the data. It the data is still in the Receiver memory cache, the client requests data from there via a Data Server (which is a part of ASAPO Receiver). Otherwise, client gets the data from the filesystem - directly if the filesystem is accessible on the machine where the client is running or via File Transfer Service if not. + + + + diff --git a/docs/site/versioned_docs/version-22.03.0/data-in-asapo.md b/docs/site/versioned_docs/version-22.03.0/data-in-asapo.md new file mode 100644 index 0000000000000000000000000000000000000000..96abf43c86c13873641ea767b7766e3145baa5d3 --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/data-in-asapo.md @@ -0,0 +1,29 @@ +--- +title: Data in ASAPO +--- +All data that is produced, stored and consumed via ASAPO is structured on several levels. + +#### Beamtime +This is the top level. Contains all data collected/produced during a single beamtime (Beamtime is the term used at DESY. Can also be Run, Experiment, Proposal, ...). Each beamtime has its own unique ID. + +#### Data Source +During a beamtime, data can be produced by different sources. For example, a detector is a data source, if multiple detectors are used during an experiment, they can be different data sources or the same data source (more details below in datasets section). A user application that simulates or analyses data can also act as an ASAPO data source. Each data source has its own unique name within a beamtime. + +#### Data Stream +Each data source can emit multiple data streams. Each stream has a unique within a specific data source name. + +#### Message +Data streams consist of smaller entities - messages. The content of a message is quite flexible, to be able to cover a broad amount of use cases. Usually it is a metadata and some binary data (e.g. a detector image, or an hdf5 file with multiple images). At the moment ASAPO itself is agnostic to the data and sees it as a binary array. Later some specific cases might be handled as well (the most prominent use case - an hdf5 file with multiple images). + +An important aspect is that each message within a data stream must be assigned a consecutive integer index. Therefore, a streams always contain messages with index = 1,2,3 ... . This is different to traditional messaging systems where messages have timestamps or arbitrary unique hash IDs. The reason is that with timestamps the order of messages saved in the system might differ from the order the were generated by the data source (e.g. detector). And keeping correct order is required in many cases. Second reason is that it makes a random access to a specific message quite straightforward. + +#### Datasets/Dataset substreams +In some cases multiple detectors are using during an experiment. E.g. a 3D image is composed from multiple 2D images created by different detectors. In this case these 2D images can be composed to a dataset so that it a be processed later as a whole. One would then use a single data source (which would mean a set of detectors or "multi-detector" in this case), single data stream and, to compose a dataset, for each of it's components (each 2D image in our example) the corresponding detector would send a message with same id but to a different dataset substream. + +So, for the case without datasets (single detector) the data hierarchy is Beamtime→Data Source → Data Stream → Message: + + + +And with datasets (multi-detector) the data hierarchy is Beamtime→Data Source → Data Stream → Dataset→ Message in Dataset Substream: + + diff --git a/docs/site/versioned_docs/version-22.03.0/getting-started.mdx b/docs/site/versioned_docs/version-22.03.0/getting-started.mdx new file mode 100644 index 0000000000000000000000000000000000000000..8eb725bf56547438a1c24517f1e546421ff7e1de --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/getting-started.mdx @@ -0,0 +1,117 @@ +--- +title: Getting Started +slug: / +--- + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +## Start ASAPO services {#step-1} + +If you already have running ASAPO services and know the endpoint, you don't need this, and can go to [Client Libraries](#step-2). + +Otherwise, for testing purposes one can start ASAPO services in a standalone mode (this is not recommended for production deployment). + + +The easiest way is to use a Docker container. +So, make sure Docker is installed and you have necessary permissions to use it. +Please note that this will only work on a Linux machine. Also please note that ASAPO needs some ports to be available. You can check the list +[here](https://stash.desy.de/projects/ASAPO/repos/asapo/browse/deploy/asapo_services/scripts/asapo.auto.tfvars.in#37). + +Now, depending on how your Docker daemon is configured (if it uses a +unix socket or a tcp port for communications) + you can use pick corresponding script below, adjust and execute it to start ASAPO services. + +<Tabs + defaultValue="unix" + values={[ + { label: 'Docker with unix socket (default)', value: 'unix', }, + { label: 'Docker with tcp (used on FS machines)', value: 'tcp', }, + ] +}> +<TabItem value="unix"> + +```shell content="./versioned_examples/version-22.03.0/start_asapo_socket.sh" +``` + +</TabItem> + +<TabItem value="tcp"> + +```shell content="./versioned_examples/version-22.03.0/start_asapo_tcp.sh" +``` + +</TabItem> +</Tabs> + +at the end you should see + +<p className="green-text"><strong>Apply complete! Resources: 25 added, 0 changed, 0 destroyed.</strong></p> + +which means ASAPO services successfully started. Your ASAPO endpoint for API calls will be **localhost:8400**. + +### Create data directories + +Next, you need to create directories where ASAPO will store the data +(the structure matches the one used at DESY experiments). +Since we are going to use beamline `test` and beamtime `asapo_test` in following examples, +we must create two folders, one for the beamline filesystem and one for the core file system: + +```shell +ASAPO_HOST_DIR=/var/tmp/asapo # the folder used in step 1 +mkdir -p $ASAPO_HOST_DIR/global_shared/online_data/test/current/raw +mkdir -p $ASAPO_HOST_DIR/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test +``` + +:::note ASAP::O in production mode + +We have a running instance for processing data collected during experiments. Please get in touch with FS-SC group for more information. + +::: + +### Services shutdown + +After you've done with your instance of ASAPO, you might want to gracefully shutdown the running services. If you don't do it, your machine will become bloated with the unused docker images. + +```shell content="./versioned_examples/version-22.03.0/cleanup.sh" +``` + +<br/><br/> + +## Install client libraries {#step-2} + +Now you can install Python packages or C++ libraries for ASAPO Producer and Consumer API (you need to be in DESY intranet to access files). + +<Tabs + defaultValue="python-pip" + values={[ + { label: 'Python - pip', value: 'python-pip', }, + { label: 'Python - packages', value: 'python-packages', }, + { label: 'C++ packages', value: 'cpp', }, + ] +}> +<TabItem value="python-pip"> + +```shell content="./versioned_examples/version-22.03.0/install_python_clients_pip.sh" snippetTag="#snippet1" +``` + +</TabItem> +<TabItem value="python-packages"> + +```shell content="./versioned_examples/version-22.03.0/install_python_clients_pkg.sh" +``` + +</TabItem> +<TabItem value="cpp"> + +```shell content="./versioned_examples/version-22.03.0/install_cpp_clients.sh" +``` + +</TabItem> +</Tabs> + +## Code examples + +Please refer to the [Code Examples](cookbook/overview) sections to see the code snippets for various usage scenarious. + diff --git a/docs/site/versioned_docs/version-22.03.0/overview.md b/docs/site/versioned_docs/version-22.03.0/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..7af1f0471cfe9deb243d8d5de447c76ebcf9b30a --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/overview.md @@ -0,0 +1,40 @@ +--- +title: Overview +--- + + + +ASAP::O (or ASAPO) is a high performance distributed streaming platform. It is being developed at DESY and is mainly aimed to support online/offline analysis of experimental data produced at its facilities. The ideas behind are quite similar to that of Apache Kafka and similar messaging solutions, but ASAPO is developed and tuned for scientific use cases with their specific workflows and where the size of the messages is much large (MBs to GBs as compared to KBs in traditional systems). + + + +ASAPO has the following key capabilities: + +- Deliver data produced by an experimental facility (e.g. detector) to a data center in a high-performant fault-tolerant way +- Consume this data in various modes (as soon as new data occurs, random access, latest available data, in parallel, ...) +- Ingest own data/ create computational pipelines + + +ASAPO consists of the following three components: + +- Core services (run in background on a single node or a cluster and provide ASAPO functionality) +- Producer API to ingest data into the system +- Consumer API to retrieve data from the system + +### Bird's eye view + +A workflow when using ASAPO can be represented as follows: + + + + +Usually, an end user can see ASAPO core services as a black box. But some more details are given [here](core-architecture). + +Next, one can learn more about following concepts: + +- [Data in ASAPO](data-in-asapo) +- [Producer clients](producer-clients) +- [Consumer clients](consumer-clients) + +You can also compare with other solutions, jump directly to [Getting Started](getting-started.mdx) or have a look in use cases section + diff --git a/docs/site/versioned_docs/version-22.03.0/p02.1.md b/docs/site/versioned_docs/version-22.03.0/p02.1.md new file mode 100644 index 0000000000000000000000000000000000000000..271b2bf10a91d2ecf398e2a571b49b0df874bbe7 --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/p02.1.md @@ -0,0 +1,43 @@ +--- +title: ASAP::O at P02.1 +--- + +Online analysis at P02.1 has two main goals: + +- Doing as much beamline specific data analysis as possible for the user, so that they can concentrate on analyzing the experiment specific details. This will lead to a comprehensive support for the user from beamline side and therefore lead to a higher user satisfaction. Automatization of the analysis is essential to achieve the necessary high throughput, which is mandatory for current and future diffraction applications. +- Enabling timely decisions through a "live" view of raw images and analyzed data. Problems with the measurement can often be more easily detected in the analyzed data, which should be made available to the user as early as possible to avoid wasting valuable measurement time on suboptimal experimental conditions. + +## Description of a typical beamtime at P02.1 + +- A beamtime consists of a number of scans +- Each scan consists of one or more steps +- At each step, an image is taken by the detectors, as well as several other scalar sensors values are gathered, e.g., temperature, electric current, position, etc. +- The parameters for the analysis are fixed during one scan but might need to change from one scan to the next + +## Analysis Pipeline + +- Images are taken by one or two detectors +- Optionally, a number of consecutive images of a single detector are merged into one averaged image to reduce the noise +- The (averaged) images are stored into one NeXus file per detector per scan +- Each (averaged) image is analyzed independently +- The analyzed data is written to one NeXus file per detector per scan +- All scalar sensor data and additional metadata is written to one NeXus file per scan that links to the other NeXus files with the (averaged) images and analyzed data +- A viewer displays the live and history output of all relevant processing steps + + + +## Use of ASAPO + +In the following, ASAPO specific details for the pipeline of a single detector are given. For multiple detectors, all stream names are suffixed by the detector ID. + +1. The data acquisition software stores the parameters for the analysis in a "scan-metadata" stream with one substream per scan and one metadata entry per substream +2. Images are ingested into ASAPO +3. The images taken by the detectors are written to the beamline filesystem by HiDRA (one file per image) +4. HiDRA inserts the files into ASAPO. It assigns the files to the correct "detector" stream based on the file name. Each stream uses one substream per scan, its name is also extracted from the filename by HiDRA. This applies to the index within a substream as well. +5. If enabled, one "averager" worker per detector stream reads the files from the "detector" stream and emits the averaged images into the "averaged" stream. The name of the substream of the input is used for the name of the output substream. The indices within a substream are chosen consecutively. +6. One "nexus-writer" worker per detector reads the images either from the "detector" or the "averaged" stream. All images of a single substream are stored into one file. The filename is constructed from the name of the stream and substream the image belongs to. The index within a substream corresponds to the index within the HDF5 dataset. +7. Multiple "asapo-dawn" worker read their parameters from the "scan-metadata" stream at the start of each substream. The images are read from the "detector" or "averaged" stream. The worker emit the resulting data into an "analyzed" stream with the same substream name as the input and the same index. +8. One "nexus-write" worker per detector reads the analyzed data from the "analyzed" stream and writes it into one NeXus file per substream. The filename is constructed from the name of the stream and substream the data belongs to. The index within a substream corresponds to the index within the HDF5 dataset. +9. The data acquisition software stores all scalar data and all additional scan-metadata in a master NeXus file that links to the NeXus files produced by the ASAPO workers. +10. The viewer listens to all streams and parses the metadata to create a continuously updated tree view of all available data. Clicking on an item uses get_by_id to retrieve the actual data. A "live" mode automatically retrieves the latest data. + diff --git a/docs/site/versioned_docs/version-22.03.0/producer-clients.md b/docs/site/versioned_docs/version-22.03.0/producer-clients.md new file mode 100644 index 0000000000000000000000000000000000000000..89b421a9960e77bf8495e59b3595ac39e4fb5dea --- /dev/null +++ b/docs/site/versioned_docs/version-22.03.0/producer-clients.md @@ -0,0 +1,23 @@ +--- +title: Producer Clients +--- + +Producer client (or producer) is a part of a distributed streaming system that is responsible for creating data streams (i.e. ingesting data in the system). It is usually a user (beamline scientist, detector developer, physicist, ... ) responsibility to develop a client for specific beamline, detector or experiment using ASAPO Producer API and ASAPO responsibility to make sure data is transferred and saved an in efficient and reliable way. + + + +Producer API is available for C++ and Python and has the following main functionality: + +- Create a producer instance and bind it to a specific beamtime and data source +multiple instances can be created (also within a single application) to send data from different sources. An instance id and pipeline step id can be set to allow pipeline monitoring. +- Send messages to a specific stream (you can read more [here](data-in-asapo) about data in ASAPO) + - each message must have a consecutive integer index, ASAPO does not create indexes automatically + - to compose datasets, dataset substream (and dataset size) should be send along with each message + - messages are sent asynchronously, in parallel using multiple threads + - retransfer will be attempted in case of system failure + - a callback function can be provided to react after data was sent/process error + +Please refer to [C++](http://asapo.desy.de/cpp/) and [Python](http://asapo.desy.de/python/) documentation for specific details (available from DESY intranet only). + + + diff --git a/docs/site/versioned_examples/version-21.06.0/python/produce.py b/docs/site/versioned_examples/version-21.06.0/python/produce.py index 2a645631e525d84246507b80c4cf2cefa71b2427..a37c168db1b9ea9b5c18b5a7ca12bea83544ee74 100644 --- a/docs/site/versioned_examples/version-21.06.0/python/produce.py +++ b/docs/site/versioned_examples/version-21.06.0/python/produce.py @@ -19,7 +19,7 @@ producer = asapo_producer.create_producer(endpoint, beamtime, # the folder should exist 'auto', # can be 'auto', if beamtime_id is given 'test_source', # source - '', # athorization token + '', # authorization token 1, # number of threads. Increase, if the sending speed seems slow 60000) # timeout. Do not change. diff --git a/docs/site/versioned_examples/version-21.09.0/cpp/consume.cpp b/docs/site/versioned_examples/version-21.09.0/cpp/consume.cpp index aa55634693e10446dbe03f6313cec75215836272..6bbcd3e79a6418bb11d5887e5ed93cb3a01ac3fb 100644 --- a/docs/site/versioned_examples/version-21.09.0/cpp/consume.cpp +++ b/docs/site/versioned_examples/version-21.09.0/cpp/consume.cpp @@ -31,7 +31,7 @@ int main(int argc, char* argv[]) { beamtime, // the folder should exist "", // can be empty or "auto", if beamtime_id is given "test_source", // source - token // athorization token + token // authorization token }; auto consumer = asapo::ConsumerFactory::CreateConsumer diff --git a/docs/site/versioned_examples/version-21.09.0/cpp/produce.cpp b/docs/site/versioned_examples/version-21.09.0/cpp/produce.cpp index aee2a9b74db8f5ae583b331f593032cceafc3c68..bd5b9c3985cb53099c7bbead762ec1e597deabf6 100644 --- a/docs/site/versioned_examples/version-21.09.0/cpp/produce.cpp +++ b/docs/site/versioned_examples/version-21.09.0/cpp/produce.cpp @@ -34,7 +34,7 @@ int main(int argc, char* argv[]) { beamtime, // the folder should exist "", // can be empty or "auto", if beamtime_id is given "test_source", // source - "" // athorization token + "" // authorization token }; auto producer = asapo::Producer::Create(endpoint, diff --git a/docs/site/versioned_examples/version-21.09.0/python/produce.py b/docs/site/versioned_examples/version-21.09.0/python/produce.py index 4d03ccb9a0009ebed824396037e521d91686b9f3..50035ea72cfc4872cbe7c8120fd0fb788684cd98 100644 --- a/docs/site/versioned_examples/version-21.09.0/python/produce.py +++ b/docs/site/versioned_examples/version-21.09.0/python/produce.py @@ -20,7 +20,7 @@ producer = asapo_producer \ beamtime, # the folder should exist 'auto', # can be 'auto', if beamtime_id is given 'test_source', # source - '', # athorization token + '', # authorization token 1, # number of threads. Increase, if the sending speed seems slow 60000) # timeout. Do not change. diff --git a/docs/site/versioned_examples/version-21.12.0/cpp/consume.cpp b/docs/site/versioned_examples/version-21.12.0/cpp/consume.cpp index f13db95c0520395cd781660459a4557374d1460b..30f681c972a8e0f16b81924c8085e4f1b15681f5 100644 --- a/docs/site/versioned_examples/version-21.12.0/cpp/consume.cpp +++ b/docs/site/versioned_examples/version-21.12.0/cpp/consume.cpp @@ -33,7 +33,7 @@ int main(int argc, char* argv[]) { beamtime, // the folder should exist "", // can be empty or "auto", if beamtime_id is given "test_source", // source - token // athorization token + token // authorization token }; auto consumer = asapo::ConsumerFactory::CreateConsumer diff --git a/docs/site/versioned_examples/version-21.12.0/cpp/produce.cpp b/docs/site/versioned_examples/version-21.12.0/cpp/produce.cpp index 157d53d060d731827ec42c751f54f7aa310f268f..459704400a8455258cba9810e40f0ad6a3a6e37c 100644 --- a/docs/site/versioned_examples/version-21.12.0/cpp/produce.cpp +++ b/docs/site/versioned_examples/version-21.12.0/cpp/produce.cpp @@ -38,7 +38,7 @@ int main(int argc, char* argv[]) { beamtime, // the folder should exist "", // can be empty or "auto", if beamtime_id is given "test_source", // source - "" // athorization token + "" // authorization token }; auto producer = asapo::Producer::Create(endpoint, diff --git a/docs/site/versioned_examples/version-21.12.0/python/produce.py b/docs/site/versioned_examples/version-21.12.0/python/produce.py index 262015b25fd99be947f1756222d6a74a1bb54acb..5a63ed85ab3c4b3e8ee39ef215a18f7136526d6e 100644 --- a/docs/site/versioned_examples/version-21.12.0/python/produce.py +++ b/docs/site/versioned_examples/version-21.12.0/python/produce.py @@ -23,7 +23,7 @@ producer = asapo_producer \ beamtime, # the folder should exist 'auto', # can be 'auto', if beamtime_id is given 'test_source', # source - '', # athorization token + '', # authorization token 1, # number of threads. Increase, if the sending speed seems slow 60000) # timeout. Do not change. diff --git a/docs/site/versioned_examples/version-22.03.0/c/Makefile b/docs/site/versioned_examples/version-22.03.0/c/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ba3d4a872835ae3d20e6dfe43452a6ede2771332 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/c/Makefile @@ -0,0 +1,29 @@ +PROGRAM=asapo-consume + +LDFLAGS = "-Wl,-rpath,/opt/asapo/lib" +CFLAGS += `PKG_CONFIG_PATH=/opt/asapo/lib/pkgconfig pkg-config --cflags libasapo-consumer` +LIBS = `PKG_CONFIG_PATH=/opt/asapo/lib/pkgconfig pkg-config --libs libasapo-consumer` + +# for default installation +#LDFLAGS = +#CFLAGS += `pkg-config --cflags libasapo-consumer` +#LIBS = `pkg-config --libs libasapo-consumer` + +RM=rm -f + +SRCS=consume.c +OBJS=$(subst .c,.o,$(SRCS)) + +all: $(PROGRAM) + +$(PROGRAM): $(OBJS) + $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) + +%.o: %.cpp + $(CC) $(CFLAGS) $(INCLUDE) -c -o $@ $< + +clean: + $(RM) $(OBJS) + +distclean: clean + $(RM) $(PROGRAM) diff --git a/docs/site/versioned_examples/version-22.03.0/c/consume.c b/docs/site/versioned_examples/version-22.03.0/c/consume.c new file mode 100644 index 0000000000000000000000000000000000000000..e6cfb0957044aecd7ec21b297d808898650af18b --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/c/consume.c @@ -0,0 +1,66 @@ +#include "asapo/consumer_c.h" + +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +void exit_if_error(const char *error_string, const AsapoErrorHandle err) { + if (asapo_is_error(err)) { + char buf[1024]; + asapo_error_explain(err, buf, sizeof(buf)); + printf("%s %s\n", error_string, buf); + exit(EXIT_FAILURE); + } +} + +int main(int argc, char* argv[]) { + AsapoErrorHandle err = asapo_new_handle(); + AsapoMessageMetaHandle mm = asapo_new_handle(); + AsapoMessageDataHandle data = asapo_new_handle(); + + /* create snippet_start */ + const char *endpoint = "localhost:8400"; + const char *beamtime = "asapo_test"; + const char *token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmNDNhbGZwOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdGVzdCIsIkV4dHJhQ2xhaW1zIjp7IkFjY2Vzc1R5cGVzIjpbIndyaXRlIiwicmVhZCJdfX0.dkWupPO-ysI4t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU"; + + const char * path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test"; //set it according to your configuration. + + AsapoSourceCredentialsHandle cred = asapo_create_source_credentials(kProcessed, + "test_consumer_instance", "pipeline_step_1", beamtime, + "", "test_source", token); + AsapoConsumerHandle consumer = asapo_create_consumer(endpoint, + path_to_files, 1, + cred, + &err); + asapo_free_handle(&cred); + /* create snippet_end */ + + exit_if_error("Cannot create consumer", err); + asapo_consumer_set_timeout(consumer, 5000ull); + + /* consume snippet_start */ + AsapoStringHandle group_id = asapo_consumer_generate_new_group_id(consumer, &err); + exit_if_error("Cannot create group id", err); + + asapo_consumer_get_next(consumer, group_id, &mm, &data, "default",&err); + exit_if_error("Cannot get next record", err); + + printf("id: %llu\n", (unsigned long long)asapo_message_meta_get_id(mm)); + printf("file name: %s\n", asapo_message_meta_get_name(mm)); + printf("file content: %s\n", asapo_message_data_get_as_chars(data)); + /* consume snippet_end */ + + /* delete snippet_start */ + asapo_consumer_delete_stream(consumer,"default", 1,1,&err); + exit_if_error("Cannot delete stream", err); + printf("stream deleted\n"); + /* delete snippet_end */ + + asapo_free_handle(&err); + asapo_free_handle(&mm); + asapo_free_handle(&data); + asapo_free_handle(&consumer); + asapo_free_handle(&group_id); + + return EXIT_SUCCESS; +} diff --git a/docs/site/versioned_examples/version-22.03.0/cleanup.sh b/docs/site/versioned_examples/version-22.03.0/cleanup.sh new file mode 100644 index 0000000000000000000000000000000000000000..7344a690f3905218aa423a7f6feec4b7b0e0e394 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/cleanup.sh @@ -0,0 +1,5 @@ +ASAPO_HOST_DIR=/var/tmp/asapo # you can change this if needed + +docker exec asapo jobs-stop +docker stop asapo +rm -rf $ASAPO_HOST_DIR diff --git a/docs/site/versioned_examples/version-22.03.0/cpp/CMakeLists.txt b/docs/site/versioned_examples/version-22.03.0/cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..1d7e53c78e14292c2b9dc2e5dadf91100021a969 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/cpp/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required(VERSION 3.3) + +project(asapo-client) + +set(CMAKE_CXX_STANDARD 11) + +# optionally use some other curl lib (e.g. static) +# set (CURL_LIBRARY /usr/local/lib/libasapo-curl.a) +# optionally linh gcc and stdc++ statically +# set (ASAPO_STATIC_CXX_LIBS ON) +# optionally link asapo as shared libs (ASAPO_STATIC_CXX_LIBS not used then) +# set (ASAPO_SHARED_LIBS ON) + +#consumer snippet_start_remove +find_package (Asapo REQUIRED COMPONENTS Producer) + +set(TARGET_NAME asapo-produce) +set(SOURCE_FILES produce.cpp) + +add_executable(${TARGET_NAME} ${SOURCE_FILES}) +target_link_libraries(${TARGET_NAME} imported::asapo-producer) +#consumer snippet_end_remove +#producer snippet_start_remove +find_package (Asapo REQUIRED COMPONENTS Consumer) + +set(TARGET_NAME asapo-consume) +set(SOURCE_FILES consume.cpp) + +add_executable(${TARGET_NAME} ${SOURCE_FILES}) +target_link_libraries(${TARGET_NAME} imported::asapo-consumer) +#producer snippet_end_remove \ No newline at end of file diff --git a/docs/site/versioned_examples/version-22.03.0/cpp/acknowledgements.cpp b/docs/site/versioned_examples/version-22.03.0/cpp/acknowledgements.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d8992ea60452fff6c5f15422cda94e658099ac1d --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/cpp/acknowledgements.cpp @@ -0,0 +1,132 @@ +#include "asapo/asapo_producer.h" +#include "asapo/asapo_consumer.h" +#include <iostream> +#include <set> + +void ProcessAfterSend(asapo::RequestCallbackPayload payload, asapo::Error err) { + if (err && err != asapo::ProducerErrorTemplates::kServerWarning) { + std::cerr << "error during send: " << err << std::endl; + return; + } else if (err) { + std::cout << "warning during send: " << err << std::endl; + } else { + std::cout << "successfuly send " << payload.original_header.Json() << std::endl; + return; + } +} + +void exit_if_error(std::string error_string, const asapo::Error& err) { + if (err) { + std::cerr << error_string << err << std::endl; + exit(EXIT_FAILURE); + } +} + +int main(int argc, char* argv[]) { + asapo::Error err; + + auto endpoint = "localhost:8400"; + auto beamtime = "asapo_test"; + + auto token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJl" + "eHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmN" + "DNhbGZwOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdG" + "VzdCIsIkV4dHJhQ2xhaW1zIjp7IkFjY2Vzc1R5cGV" + "zIjpbIndyaXRlIiwicmVhZCJdfX0.dkWupPO-ysI4" + "t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU"; + + auto path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test"; + + auto credentials = asapo::SourceCredentials{asapo::SourceType::kProcessed, beamtime, "", "test_source", token}; + + auto producer = asapo::Producer::Create(endpoint, 1, asapo::RequestHandlerType::kTcp, credentials, 60000, &err); + exit_if_error("Cannot start producer", err); + + producer->SetLogLevel(asapo::LogLevel::Error); + + err = producer->DeleteStream("default", 1000, asapo::DeleteStreamOptions{true, true}); + exit_if_error("Cannot delete stream", err); + + // let's start with producing a sample of 10 simple messages + for (uint64_t i = 1; i <= 10; i++) { + std::string to_send = "message#" + std::to_string(i); + auto send_size = to_send.size() + 1; + auto buffer = asapo::MessageData(new uint8_t[send_size]); + memcpy(buffer.get(), to_send.c_str(), send_size); + + asapo::MessageHeader message_header{i, send_size, "processed/test_file_" + std::to_string(i)}; + err = producer->Send(message_header, std::move(buffer), asapo::kDefaultIngestMode, "default", &ProcessAfterSend); + exit_if_error("Cannot send message", err); + } + + err = producer->WaitRequestsFinished(2000); + exit_if_error("Producer exit on timeout", err); + + auto consumer = asapo::ConsumerFactory::CreateConsumer(endpoint, path_to_files, true, credentials, &err); + exit_if_error("Cannot start consumer", err); + consumer->SetTimeout(5000); + auto group_id = consumer->GenerateNewGroupId(&err); + exit_if_error("Cannot create group id", err); + + // consume snippet_start + asapo::MessageMeta mm; + asapo::MessageData data; + + const std::set<int> ids {3, 5, 7}; + + // the flag to separate the first attempt for message #3 + bool firstTryNegative = true; + + do { + err = consumer->GetNext(group_id, &mm, &data, "default"); + + if (err && err == asapo::ConsumerErrorTemplates::kStreamFinished) { + std::cout << "stream finished" << std::endl; + break; + } + + if (err && err == asapo::ConsumerErrorTemplates::kEndOfStream) { + std::cout << "stream ended" << std::endl; + break; + } + exit_if_error("Cannot get next record", err); // snippet_end_remove + + // acknowledge all the messages except the ones in the set + if (ids.find(mm.id) == ids.end()) { + std::cout << "Acknowledge the message #" << mm.id << std::endl; + consumer->Acknowledge(group_id, mm.id, "default"); + } + + // for message #3 we issue a negative acknowledgement, which will put it at the next place in the stream + // in this case, it will be put in the end of a stream + if (mm.id == 3) { + if (firstTryNegative) { + std::cout << "Negative acknowledgement of the message #" << mm.id << std::endl; + // make the acknowledgement with a delay of 1 second + consumer->NegativeAcknowledge(group_id, mm.id, 2000, "default"); + firstTryNegative = false; + } else { + // on our second attempt we acknowledge the message + std::cout << "Second try of the message #" << mm.id << std::endl; + consumer->Acknowledge(group_id, mm.id, "default"); + } + } + } while (1); + // consume snippet_end + + // print snippet_start + auto unacknowledgedMessages = consumer->GetUnacknowledgedMessages(group_id, 0, 0, "default", &err); + exit_if_error("Could not get list of messages", err); // snippet_end_remove + + for (int i = 0; i < unacknowledgedMessages.size(); i++) { + err = consumer->GetById(unacknowledgedMessages[i], &mm, &data, "default"); + exit_if_error("Cannot get message", err); // snippet_end_remove + + std::cout << "Unacknowledged message: " << reinterpret_cast<char const*>(data.get()) << std::endl; + std::cout << "id: " << mm.id << std::endl; + std::cout << "file name: " << mm.name << std::endl; + } + // print snippet_end + + return EXIT_SUCCESS; +} diff --git a/docs/site/versioned_examples/version-22.03.0/cpp/consume.cpp b/docs/site/versioned_examples/version-22.03.0/cpp/consume.cpp new file mode 100644 index 0000000000000000000000000000000000000000..41b48881e9b2d831756c7788d83d2a6f16c42314 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/cpp/consume.cpp @@ -0,0 +1,100 @@ +#include "asapo/asapo_consumer.h" +#include <iostream> + + +void exit_if_error(std::string error_string, const asapo::Error& err) { + if (err) { + std::cerr << error_string << std::endl << err << std::endl; + exit(EXIT_FAILURE); + } +} + +int main(int argc, char* argv[]) { + asapo::Error err; + +// create snippet_start + auto endpoint = "localhost:8400"; + auto beamtime = "asapo_test"; + + // test token. In production it is created during the start of the beamtime + auto token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJl" + "eHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmN" + "DNhbGZwOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdG" + "VzdCIsIkV4dHJhQ2xhaW1zIjp7IkFjY2Vzc1R5cGV" + "zIjpbIndyaXRlIiwicmVhZCJdfX0.dkWupPO-ysI4" + "t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU"; + + // set it according to your configuration. + auto path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test"; + + auto credentials = asapo::SourceCredentials + { + "test_consumer_instance", // # conumser instance id (can be "auto") + "pipeline_step_1", // # pipeline step id + asapo::SourceType::kProcessed, // should be kProcessed or kRaw, kProcessed writes to the core FS + beamtime, // the folder should exist + "", // can be empty or "auto", if beamtime_id is given + "test_source", // source + token // authorization token + }; + + auto consumer = asapo::ConsumerFactory::CreateConsumer + (endpoint, + path_to_files, + true, // True if the path_to_files is accessible locally, False otherwise + credentials, // same as for producer + &err); +// create snippet_end + exit_if_error("Cannot create consumer", err); + consumer->SetTimeout(5000); // How long do you want to wait on non-finished stream for a message. + +// list snippet_start + for (const auto& stream : consumer->GetStreamList("", asapo::StreamFilter::kAllStreams, &err)) + { + std::cout << "Stream name: " << stream.name << std::endl; + std::cout << "LastId: " << stream.last_id << std::endl; + std::cout << "Stream finished: " << stream.finished << std::endl; + std::cout << "Next stream: " << stream.next_stream << std::endl; + } +// list snippet_end + +// consume snippet_start + // Several consumers can use the same group_id to process messages in parallel + auto group_id = consumer->GenerateNewGroupId(&err); + exit_if_error("Cannot create group id", err); // snippet_end_remove + + asapo::MessageMeta mm; + asapo::MessageData data; + + do { + // GetNext is the main function to get messages from streams. You would normally call it in loop. + // you can either manually compare the mm.id to the stream.last_id, or wait for the error to happen + err = consumer->GetNext(group_id, &mm, &data, "default"); + + if (err && err == asapo::ConsumerErrorTemplates::kStreamFinished) { + // all the messages in the stream were processed + std::cout << "stream finished" << std::endl; + break; + } + if (err && err == asapo::ConsumerErrorTemplates::kEndOfStream) { + // not-finished stream timeout, or wrong or empty stream + std::cout << "stream ended" << std::endl; + break; + } + exit_if_error("Cannot get next record", err); // snippet_end_remove + + std::cout << "id: " << mm.id << std::endl; + std::cout << "file name: " << mm.name << std::endl; + std::cout << "message content: " << reinterpret_cast<char const*>(data.get()) << std::endl; + } while (1); +// consume snippet_end + +// delete snippet_start + // you can delete the stream after consuming + err = consumer->DeleteStream("default", asapo::DeleteStreamOptions{true, true}); + exit_if_error("Cannot delete stream", err); // snippet_end_remove + std::cout << "stream deleted" << std::endl; +// delete snippet_end + + return EXIT_SUCCESS; +} diff --git a/docs/site/versioned_examples/version-22.03.0/cpp/consume_dataset.cpp b/docs/site/versioned_examples/version-22.03.0/cpp/consume_dataset.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8b11aebedd40f84ff2a80b5b55df110e6858f068 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/cpp/consume_dataset.cpp @@ -0,0 +1,71 @@ +#include "asapo/asapo_consumer.h" +#include <iostream> + + +void exit_if_error(std::string error_string, const asapo::Error& err) { + if (err) { + std::cerr << error_string << std::endl << err << std::endl; + exit(EXIT_FAILURE); + } +} + +int main(int argc, char* argv[]) { + asapo::Error err; + + auto endpoint = "localhost:8400"; + auto beamtime = "asapo_test"; + + auto token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJl" + "eHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmN" + "DNhbGZwOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdG" + "VzdCIsIkV4dHJhQ2xhaW1zIjp7IkFjY2Vzc1R5cGV" + "zIjpbIndyaXRlIiwicmVhZCJdfX0.dkWupPO-ysI4" + "t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU"; + + auto path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test"; + + auto credentials = asapo::SourceCredentials{asapo::SourceType::kProcessed, beamtime, "", "test_source", token}; + + auto consumer = asapo::ConsumerFactory::CreateConsumer(endpoint, path_to_files, true, credentials, &err); + exit_if_error("Cannot create consumer", err); + consumer->SetTimeout((uint64_t) 5000); + + auto group_id = consumer->GenerateNewGroupId(&err); + exit_if_error("Cannot create group id", err); + + // dataset snippet_start + asapo::DataSet ds; + asapo::MessageData data; + + do { + ds = consumer->GetNextDataset(group_id, 0, "default", &err); + + if (err && err == asapo::ConsumerErrorTemplates::kStreamFinished) { + std::cout << "stream finished" << std::endl; + break; + } + + if (err && err == asapo::ConsumerErrorTemplates::kEndOfStream) { + std::cout << "stream ended" << std::endl; + break; + } + exit_if_error("Cannot get next record", err); // snippet_end_remove + + std::cout << "Dataset Id: " << ds.id << std::endl; + + for(int i = 0; i < ds.content.size(); i++) + { + err = consumer->RetrieveData(&ds.content[i], &data); + exit_if_error("Cannot get dataset content", err); // snippet_end_remove + + std::cout << "Part " << ds.content[i].dataset_substream << " out of " << ds.expected_size << std:: endl; + std::cout << "message content: " << reinterpret_cast<char const*>(data.get()) << std::endl; + } + } while (1); + // dataset snippet_end + + err = consumer->DeleteStream("default", asapo::DeleteStreamOptions{true, true}); + exit_if_error("Cannot delete stream", err); + + return EXIT_SUCCESS; +} diff --git a/docs/site/versioned_examples/version-22.03.0/cpp/metadata.cpp b/docs/site/versioned_examples/version-22.03.0/cpp/metadata.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f63eb1d420f4d89705041e22823a1869e59d48ce --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/cpp/metadata.cpp @@ -0,0 +1,163 @@ +#include "asapo/asapo_producer.h" +#include "asapo/asapo_consumer.h" +#include <iostream> + +void ProcessAfterSend(asapo::RequestCallbackPayload payload, asapo::Error err) { + if (err && err != asapo::ProducerErrorTemplates::kServerWarning) { + std::cerr << "error during send: " << err << std::endl; + return; + } else if (err) { + std::cout << "warning during send: " << err << std::endl; + } else { + std::cout << "successfuly send " << payload.original_header.Json() << std::endl; + return; + } +} + +void exit_if_error(std::string error_string, const asapo::Error& err) { + if (err) { + std::cerr << error_string << err << std::endl; + exit(EXIT_FAILURE); + } +} + +int main(int argc, char* argv[]) { + asapo::Error err; + + auto endpoint = "localhost:8400"; + auto beamtime = "asapo_test"; + + auto token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJl" + "eHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmN" + "DNhbGZwOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdG" + "VzdCIsIkV4dHJhQ2xhaW1zIjp7IkFjY2Vzc1R5cGV" + "zIjpbIndyaXRlIiwicmVhZCJdfX0.dkWupPO-ysI4" + "t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU"; + + auto path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test"; + + auto credentials = asapo::SourceCredentials{asapo::SourceType::kProcessed, beamtime, "", "test_source", token}; + + auto producer = asapo::Producer::Create(endpoint, 1, asapo::RequestHandlerType::kTcp, credentials, 60000, &err); + exit_if_error("Cannot start producer", err); + producer->SetLogLevel(asapo::LogLevel::Error); + + // beamtime_set snippet_start + // sample beamtime metadata. You can add any data you want, with any level of complexity + // in this example we use strings and ints, and one nested structure + auto beamtime_metadata = "{" + " \"name\": \"beamtime name\"," + " \"condition\": \"beamtime condition\"," + " \"intvalue1\": 5," + " \"intvalue2\": 10," + " \"structure\": {" + " \"structint1\": 20," + " \"structint2\": 30" + " }" + "}"; + + // send the metadata + // with this call the new metadata will completely replace the one that's already there + err = producer->SendBeamtimeMetadata(beamtime_metadata, asapo::MetaIngestMode{asapo::MetaIngestOp::kReplace, true}, &ProcessAfterSend); + // beamtime_set snippet_end + exit_if_error("Cannot send metadata", err); + + // beamtime_update snippet_start + // we can update the existing metadata if we want, by modifying the existing fields, or adding new ones + auto beamtime_metadata_update = "{" + " \"condition\": \"updated beamtime condition\"," + " \"newintvalue\": 15" + "}"; + + // send the metadata in the 'kUpdate' mode + err = producer->SendBeamtimeMetadata(beamtime_metadata_update, asapo::MetaIngestMode{asapo::MetaIngestOp::kUpdate, true}, &ProcessAfterSend); + // beamtime_update snippet_end + exit_if_error("Cannot send metadata", err); + + // stream_set snippet_start + // sample stream metadata + auto stream_metadata = "{" + " \"name\": \"stream name\"," + " \"condition\": \"stream condition\"," + " \"intvalue\": 44" + "}"; + + // works the same way: for the initial set we use 'kReplace' the stream metadata, but update is also possible + // update works exactly the same as for beamtime, but here we will only do 'kReplace' + err = producer->SendStreamMetadata(stream_metadata, asapo::MetaIngestMode{asapo::MetaIngestOp::kUpdate, true}, "default", &ProcessAfterSend); + // stream_set snippet_end + exit_if_error("Cannot send metadata", err); + + // message_set snippet_start + // sample message metadata + auto message_metadata = "{" + " \"name\": \"message name\"," + " \"condition\": \"message condition\"," + " \"somevalue\": 55" + "}"; + + std::string data_string = "hello"; + auto send_size = data_string.size() + 1; + auto buffer = asapo::MessageData(new uint8_t[send_size]); + memcpy(buffer.get(), data_string.c_str(), send_size); + + // the message metadata is sent together with the message itself + // in case of datasets each part has its own metadata + asapo::MessageHeader message_header{1, send_size, "processed/test_file", message_metadata}; + err = producer->Send(message_header, std::move(buffer), asapo::kDefaultIngestMode, "default", &ProcessAfterSend); + // message_set snippet_end + exit_if_error("Cannot send message", err); + + err = producer->WaitRequestsFinished(2000); + exit_if_error("Producer exit on timeout", err); + + auto consumer = asapo::ConsumerFactory::CreateConsumer(endpoint, path_to_files, true, credentials, &err); + exit_if_error("Cannot start consumer", err); + + // beamtime_get snippet_start + // read the beamtime metadata + auto beamtime_metadata_read = consumer->GetBeamtimeMeta(&err); + exit_if_error("Cannot get metadata", err); // snippet_end_remove + + std::cout << "Updated beamtime metadata:" << std::endl << beamtime_metadata_read << std::endl; + // beamtime_get snippet_end + + // stream_get snippet_start + // read the stream metadata + auto stream_metadata_read = consumer->GetStreamMeta("default", &err); + exit_if_error("Cannot get metadata", err); + + std::cout << "Stream metadata:" << std::endl << stream_metadata_read << std::endl; + // stream_get snippet_end + + auto group_id = consumer->GenerateNewGroupId(&err); + exit_if_error("Cannot create group id", err); + + asapo::MessageMeta mm; + asapo::MessageData data; + + do { + // message_get snippet_start + err = consumer->GetNext(group_id, &mm, &data, "default"); + + // message_get snippet_start_remove + if (err && err == asapo::ConsumerErrorTemplates::kStreamFinished) { + std::cout << "stream finished" << std::endl; + break; + } + + if (err && err == asapo::ConsumerErrorTemplates::kEndOfStream) { + std::cout << "stream ended" << std::endl; + break; + } + exit_if_error("Cannot get next record", err); + // message_get snippet_end_remove + + std::cout << "Message #" << mm.id << std::endl; + // our custom metadata is stored inside the message metadata + std::cout << "Message metadata:" << std::endl << mm.metadata << std::endl; + // message_get snippet_end + } while (1); + + return EXIT_SUCCESS; +} diff --git a/docs/site/versioned_examples/version-22.03.0/cpp/next_stream.cpp b/docs/site/versioned_examples/version-22.03.0/cpp/next_stream.cpp new file mode 100644 index 0000000000000000000000000000000000000000..41ecf9f5f68c0b32841efae24e85ee9cb06ff578 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/cpp/next_stream.cpp @@ -0,0 +1,127 @@ +#include "asapo/asapo_producer.h" +#include "asapo/asapo_consumer.h" +#include <iostream> + +void ProcessAfterSend(asapo::RequestCallbackPayload payload, asapo::Error err) { + if (err && err != asapo::ProducerErrorTemplates::kServerWarning) { + std::cerr << "error during send: " << err << std::endl; + return; + } else if (err) { + std::cout << "warning during send: " << err << std::endl; + } else { + std::cout << "successfuly send " << payload.original_header.Json() << std::endl; + return; + } +} + +void exit_if_error(std::string error_string, const asapo::Error& err) { + if (err) { + std::cerr << error_string << err << std::endl; + exit(EXIT_FAILURE); + } +} + +int main(int argc, char* argv[]) { + asapo::Error err; + + auto endpoint = "localhost:8400"; + auto beamtime = "asapo_test"; + + auto token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJl" + "eHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmN" + "DNhbGZwOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdG" + "VzdCIsIkV4dHJhQ2xhaW1zIjp7IkFjY2Vzc1R5cGV" + "zIjpbIndyaXRlIiwicmVhZCJdfX0.dkWupPO-ysI4" + "t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU"; + + auto path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test"; + + auto credentials = asapo::SourceCredentials{asapo::SourceType::kProcessed, beamtime, "", "test_source", token}; + + auto producer = asapo::Producer::Create(endpoint, 1, asapo::RequestHandlerType::kTcp, credentials, 60000, &err); + exit_if_error("Cannot start producer", err); + + producer->SetLogLevel(asapo::LogLevel::Error); + + // let's start with producing a sample of 10 simple messages + for (uint64_t i = 1; i <= 10; i++) { + std::string to_send = "content of the message #" + std::to_string(i); + auto send_size = to_send.size() + 1; + auto buffer = asapo::MessageData(new uint8_t[send_size]); + memcpy(buffer.get(), to_send.c_str(), send_size); + + asapo::MessageHeader message_header{i, send_size, "processed/test_file_" + std::to_string(i)}; + err = producer->Send(message_header, std::move(buffer), asapo::kDefaultIngestMode, "default", &ProcessAfterSend); + exit_if_error("Cannot send message", err); + } + + // next_stream_set snippet_start + // finish the stream and set the next stream to be called 'next' + producer->SendStreamFinishedFlag("default", 10, "next", &ProcessAfterSend); + // next_stream_set snippet_end + + // populate the 'next' stream as well + for (uint64_t i = 1; i <= 5; i++) { + std::string to_send = "content of the message #" + std::to_string(i); + auto send_size = to_send.size() + 1; + auto buffer = asapo::MessageData(new uint8_t[send_size]); + memcpy(buffer.get(), to_send.c_str(), send_size); + + asapo::MessageHeader message_header{i, send_size, "processed/test_file_next_" + std::to_string(i)}; + err = producer->Send(message_header, std::move(buffer), asapo::kDefaultIngestMode, "next", &ProcessAfterSend); + exit_if_error("Cannot send message", err); + } + + // we leave the 'next' stream unfinished, but the chain of streams can be of any length + + err = producer->WaitRequestsFinished(2000); + exit_if_error("Producer exit on timeout", err); + + auto consumer = asapo::ConsumerFactory::CreateConsumer(endpoint, path_to_files, true, credentials, &err); + consumer->SetTimeout(5000); + auto group_id = consumer->GenerateNewGroupId(&err); + exit_if_error("Cannot create group id", err); + + asapo::MessageMeta mm; + asapo::MessageData data; + + // read_stream snippet_start + // we start with the 'default' stream (the first one) + std::string stream_name = "default"; + + do { + err = consumer->GetNext(group_id, &mm, &data, stream_name); + + if (err && err == asapo::ConsumerErrorTemplates::kStreamFinished) { + // when the stream finishes, we look for the info on the next stream + auto streams = consumer->GetStreamList("", asapo::StreamFilter::kAllStreams, &err); + // first, we find the stream with our name in the list of streams + auto stream = std::find_if(streams.begin(), streams.end(), [&stream_name](const asapo::StreamInfo & s) { + return s.name == stream_name; + }); + + // then we look if the field 'nextStream' is set and not empty + if (stream != streams.end() && !stream->next_stream.empty()) { + // if it's not, we continue with the next stream + stream_name = stream->next_stream; + std::cout << "Changing stream to the next one: " << stream_name << std::endl; + continue; + } else { + // otherwise we stop + std::cout << "stream finished" << std::endl; + break; + } + } + + if (err && err == asapo::ConsumerErrorTemplates::kEndOfStream) { + std::cout << "stream ended" << std::endl; + break; + } + exit_if_error("Cannot get next record", err); // snippet_end_remove + + std::cout << "Message #" << mm.id << ", message content: " << reinterpret_cast<char const*>(data.get()) << std::endl; + } while (1); + // read_stream snippet_end + + return EXIT_SUCCESS; +} diff --git a/docs/site/versioned_examples/version-22.03.0/cpp/pipeline.cpp b/docs/site/versioned_examples/version-22.03.0/cpp/pipeline.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c63c7ee5f41348d3726cc4715f015c3b61840f24 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/cpp/pipeline.cpp @@ -0,0 +1,100 @@ +#include "asapo/asapo_producer.h" +#include "asapo/asapo_consumer.h" +#include <iostream> + +void ProcessAfterSend(asapo::RequestCallbackPayload payload, asapo::Error err) { + if (err && err != asapo::ProducerErrorTemplates::kServerWarning) { + std::cerr << "error during send: " << err << std::endl; + return; + } else if (err) { + std::cout << "warning during send: " << err << std::endl; + } else { + std::cout << "successfuly send " << payload.original_header.Json() << std::endl; + return; + } +} + +void exit_if_error(std::string error_string, const asapo::Error& err) { + if (err) { + std::cerr << error_string << err << std::endl; + exit(EXIT_FAILURE); + } +} + +int main(int argc, char* argv[]) { + asapo::Error err; + + auto endpoint = "localhost:8400"; + auto beamtime = "asapo_test"; + + auto token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJl" + "eHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmN" + "DNhbGZwOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdG" + "VzdCIsIkV4dHJhQ2xhaW1zIjp7IkFjY2Vzc1R5cGV" + "zIjpbIndyaXRlIiwicmVhZCJdfX0.dkWupPO-ysI4" + "t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU"; + + auto path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test"; + + auto credentials = asapo::SourceCredentials{asapo::SourceType::kProcessed, beamtime, "", "test_source", token}; + + auto producer = asapo::Producer::Create(endpoint, 1, asapo::RequestHandlerType::kTcp, credentials, 60000, &err); + exit_if_error("Cannot start producer", err); + auto consumer = asapo::ConsumerFactory::CreateConsumer(endpoint, path_to_files, true, credentials, &err); + exit_if_error("Cannot start consumer", err); + consumer->SetTimeout(5000); + auto group_id = consumer->GenerateNewGroupId(&err); + exit_if_error("Cannot create group id", err); + + // pipeline snippet_start + // put the processed message into the new stream + auto pipelined_stream_name = "pipelined"; + + asapo::MessageMeta mm; + asapo::MessageData data; + + do { + // we expect the message to be in the 'default' stream already + err = consumer->GetNext(group_id, &mm, &data, "default"); + + if (err && err == asapo::ConsumerErrorTemplates::kStreamFinished) { + std::cout << "stream finished" << std::endl; + break; + } + + if (err && err == asapo::ConsumerErrorTemplates::kEndOfStream) { + std::cout << "stream ended" << std::endl; + break; + } + exit_if_error("Cannot get next record", err); // snippet_end_remove + + // work on our data + auto processed_string = std::string(reinterpret_cast<char const*>(data.get())) + " processed"; + auto send_size = processed_string.size() + 1; + auto buffer = asapo::MessageData(new uint8_t[send_size]); + memcpy(buffer.get(), processed_string.c_str(), send_size); + + // you may use the same filename, if you want to rewrite the source file. This will result in warning, but it is a valid usecase + asapo::MessageHeader message_header{mm.id, send_size, std::string("processed/test_file_") + std::to_string(mm.id)}; + err = producer->Send(message_header, std::move(buffer), asapo::kDefaultIngestMode, pipelined_stream_name, &ProcessAfterSend); + exit_if_error("Cannot send message", err); // snippet_end_remove + } while (1); + // pipeline snippet_end + + + err = producer->WaitRequestsFinished(2000); + exit_if_error("Producer exit on timeout", err); + + // finish snippet_start + // the meta from the last iteration corresponds to the last message + auto last_id = mm.id; + + err = producer->SendStreamFinishedFlag("pipelined",last_id, "", &ProcessAfterSend); + // finish snippet_end + exit_if_error("Cannot finish stream", err); + + // you can remove the source stream if you do not need it anymore + err = consumer->DeleteStream("default", asapo::DeleteStreamOptions{true, true}); + + return EXIT_SUCCESS; +} diff --git a/docs/site/versioned_examples/version-22.03.0/cpp/produce.cpp b/docs/site/versioned_examples/version-22.03.0/cpp/produce.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d47f324c38f30b2ce209500b02c18a4a0a0cf588 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/cpp/produce.cpp @@ -0,0 +1,88 @@ +#include "asapo/asapo_producer.h" +#include <iostream> + +// callback snippet_start +void ProcessAfterSend(asapo::RequestCallbackPayload payload, asapo::Error err) { + if (err && err != asapo::ProducerErrorTemplates::kServerWarning) { + // the data was not sent. Something is terribly wrong. + std::cerr << "error during send: " << err << std::endl; + return; + } else if (err) { + // The data was sent, but there was some unexpected problem, e.g. the file was overwritten. + std::cout << "warning during send: " << err << std::endl; + } else { + // all fine + std::cout << "successfuly send " << payload.original_header.Json() << std::endl; + return; + } +} +// callback snippet_end + +void exit_if_error(std::string error_string, const asapo::Error& err) { + if (err) { + std::cerr << error_string << err << std::endl; + exit(EXIT_FAILURE); + } +} + +int main(int argc, char* argv[]) { +// create snippet_start + asapo::Error err; + + auto endpoint = "localhost:8400"; + auto beamtime = "asapo_test"; + + auto credentials = asapo::SourceCredentials + { + "test_producer_instance", // # producer instance id (can be "auto") + "pipeline_step_1", // # pipeline step id + asapo::SourceType::kProcessed, // should be kProcessed or kRaw, kProcessed writes to the core FS + beamtime, // the folder should exist + "", // can be empty or "auto", if beamtime_id is given + "test_source", // source + "" // authorization token + }; + + auto producer = asapo::Producer::Create(endpoint, + 1, // number of threads. Increase, if the sending speed seems slow + asapo::RequestHandlerType::kTcp, // Use kTcp. Use kFilesystem for direct storage of files + credentials, + 60000, // timeout. Do not change. + &err); +// create snippet_end + exit_if_error("Cannot start producer", err); + +// send snippet_start + // the message must be manually copied to the buffer of the relevant size + std::string to_send = "hello"; + auto send_size = to_send.size() + 1; + auto buffer = asapo::MessageData(new uint8_t[send_size]); + memcpy(buffer.get(), to_send.c_str(), send_size); + + // we are sending a message with with index 1. Filename must start with processed/ + asapo::MessageHeader message_header{1, send_size, "processed/test_file"}; + // use the default stream + err = producer->Send(message_header, std::move(buffer), asapo::kDefaultIngestMode, "default", &ProcessAfterSend); +// send snippet_end + exit_if_error("Cannot send message", err); + + // send data in loop + + // add the following at the end of the script + +// finish snippet_start + err = producer->WaitRequestsFinished(2000); // will synchronously wait for all the data to be sent. + // Use it when no more data is expected. + exit_if_error("Producer exit on timeout", err); // snippet_end_remove + + // you may want to mark the stream as finished + err = producer->SendStreamFinishedFlag("default", // name of the stream. + 1, // the number of the last message in the stream + "", // next stream or empty + &ProcessAfterSend); + exit_if_error("Cannot finish stream", err); // snippet_end_remove + std::cout << "stream finished" << std::endl; +// finish snippet_end + + return EXIT_SUCCESS; +} diff --git a/docs/site/versioned_examples/version-22.03.0/cpp/produce_dataset.cpp b/docs/site/versioned_examples/version-22.03.0/cpp/produce_dataset.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3de79fa422f994b8aeaf5eacc27f6903380cfa91 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/cpp/produce_dataset.cpp @@ -0,0 +1,75 @@ +#include "asapo/asapo_producer.h" +#include <iostream> + +void ProcessAfterSend(asapo::RequestCallbackPayload payload, asapo::Error err) { + if (err && err != asapo::ProducerErrorTemplates::kServerWarning) { + std::cerr << "error during send: " << err << std::endl; + return; + } else if (err) { + std::cout << "warning during send: " << err << std::endl; + } else { + std::cout << "successfuly send " << payload.original_header.Json() << std::endl; + return; + } +} + +void exit_if_error(std::string error_string, const asapo::Error& err) { + if (err) { + std::cerr << error_string << err << std::endl; + exit(EXIT_FAILURE); + } +} + +int main(int argc, char* argv[]) { + asapo::Error err; + + auto endpoint = "localhost:8400"; + auto beamtime = "asapo_test"; + + auto credentials = asapo::SourceCredentials{asapo::SourceType::kProcessed, beamtime, "", "test_source", ""}; + + auto producer = asapo::Producer::Create(endpoint, 1, asapo::RequestHandlerType::kTcp, credentials, 60000, &err); + exit_if_error("Cannot start producer", err); + + // dataset snippet_start + std::string to_send = "hello dataset 1"; + auto send_size = to_send.size() + 1; + auto buffer = asapo::MessageData(new uint8_t[send_size]); + memcpy(buffer.get(), to_send.c_str(), send_size); + + // add the additional paremeters to the header: part number in the dataset and the total number of parts + asapo::MessageHeader message_header{1, send_size, "processed/test_file_dataset_1", "", 1, 3}; + + err = producer->Send(message_header, std::move(buffer), asapo::kDefaultIngestMode, "default", &ProcessAfterSend); + exit_if_error("Cannot send message", err); // snippet_end_remove + + // this can be done from different producers in any order + // we do not recalculate send_size since we know it to be the same + // we reuse the header to shorten the code + to_send = "hello dataset 2"; + buffer = asapo::MessageData(new uint8_t[send_size]); + memcpy(buffer.get(), to_send.c_str(), send_size); + + message_header.dataset_substream = 2; + err = producer->Send(message_header, std::move(buffer), asapo::kDefaultIngestMode, "default", &ProcessAfterSend); + exit_if_error("Cannot send message", err); // snippet_end_remove + + to_send = "hello dataset 3"; + buffer = asapo::MessageData(new uint8_t[send_size]); + memcpy(buffer.get(), to_send.c_str(), send_size); + + message_header.dataset_substream = 3; + err = producer->Send(message_header, std::move(buffer), asapo::kDefaultIngestMode, "default", &ProcessAfterSend); + exit_if_error("Cannot send message", err); // snippet_end_remove + // dataset snippet_end + + err = producer->WaitRequestsFinished(2000); + exit_if_error("Producer exit on timeout", err); + + // the dataset parts are not counted towards the number of messages in the stream + // the last message id in this example is still 1 + err = producer->SendStreamFinishedFlag("default", 1, "", &ProcessAfterSend); + exit_if_error("Cannot finish stream", err); + + return EXIT_SUCCESS; +} diff --git a/docs/site/versioned_examples/version-22.03.0/cpp/query.cpp b/docs/site/versioned_examples/version-22.03.0/cpp/query.cpp new file mode 100644 index 0000000000000000000000000000000000000000..78370839e74ee95eb0b9f55fadfc5e7a77e2044e --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/cpp/query.cpp @@ -0,0 +1,128 @@ +#include "asapo/asapo_producer.h" +#include "asapo/asapo_consumer.h" +#include <iostream> +#include <chrono> + +void ProcessAfterSend(asapo::RequestCallbackPayload payload, asapo::Error err) { + if (err && err != asapo::ProducerErrorTemplates::kServerWarning) { + std::cerr << "error during send: " << err << std::endl; + return; + } else if (err) { + std::cout << "warning during send: " << err << std::endl; + } else { + std::cout << "successfuly send " << payload.original_header.Json() << std::endl; + return; + } +} + +void PrintMessages(asapo::MessageMetas metas, std::unique_ptr<asapo::Consumer>& consumer) { + asapo::MessageData data; + asapo::Error err; + for (int i = 0; i < metas.size(); i++) { + err = consumer->RetrieveData(&metas[i], &data); + std::cout << "Message #" << metas[i].id + << ", content: " << reinterpret_cast<char const*>(data.get()) + << ", user metadata: " << metas[i].metadata << std::endl; + } +} + +void exit_if_error(std::string error_string, const asapo::Error& err) { + if (err) { + std::cerr << error_string << err << std::endl; + exit(EXIT_FAILURE); + } +} + +int main(int argc, char* argv[]) { + asapo::Error err; + + auto endpoint = "localhost:8400"; + auto beamtime = "asapo_test"; + + auto token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJl" + "eHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmN" + "DNhbGZwOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdG" + "VzdCIsIkV4dHJhQ2xhaW1zIjp7IkFjY2Vzc1R5cGV" + "zIjpbIndyaXRlIiwicmVhZCJdfX0.dkWupPO-ysI4" + "t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU"; + + auto path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test"; + + auto credentials = asapo::SourceCredentials{asapo::SourceType::kProcessed, beamtime, "", "test_source", token}; + + auto producer = asapo::Producer::Create(endpoint, 1, asapo::RequestHandlerType::kTcp, credentials, 60000, &err); + exit_if_error("Cannot start producer", err); + + producer->SetLogLevel(asapo::LogLevel::Error); + + err = producer->DeleteStream("default", 0, asapo::DeleteStreamOptions{true, true}); + exit_if_error("Cannot delete stream", err); + + // let's start with producing some messages with metadata + for (uint64_t i = 1; i <= 10; i++) { + auto message_metadata = "{" + " \"condition\": \"condition #" + std::to_string(i) + "\"," + " \"somevalue\": " + std::to_string(i * 10) + + "}"; + + std::string to_send = "message#" + std::to_string(i); + auto send_size = to_send.size() + 1; + auto buffer = asapo::MessageData(new uint8_t[send_size]); + memcpy(buffer.get(), to_send.c_str(), send_size); + + asapo::MessageHeader message_header{i, send_size, "processed/test_file_" + std::to_string(i), message_metadata}; + err = producer->Send(message_header, std::move(buffer), asapo::kDefaultIngestMode, "default", &ProcessAfterSend); + exit_if_error("Cannot send message", err); + } + + err = producer->WaitRequestsFinished(2000); + exit_if_error("Producer exit on timeout", err); + + auto consumer = asapo::ConsumerFactory::CreateConsumer(endpoint, path_to_files, true, credentials, &err); + exit_if_error("Cannot create group id", err); + consumer->SetTimeout(5000); + + // by_id snippet_start + // simple query, same as GetById + auto metadatas = consumer->QueryMessages("_id = 1", "default", &err); + // by_id snippet_end + exit_if_error("Cannot query messages", err); + std::cout << "Message with ID = 1" << std::endl; + PrintMessages(metadatas, consumer); + + // by_ids snippet_start + // the query that requests the range of IDs + metadatas = consumer->QueryMessages("_id >= 8", "default", &err); + // by_ids snippet_end + exit_if_error("Cannot query messages", err); + std::cout << "essages with ID >= 8" << std::endl; + PrintMessages(metadatas, consumer); + + // string_equal snippet_start + // the query that has some specific requirement for message metadata + metadatas = consumer->QueryMessages("meta.condition = \"condition #7\"", "default", &err); + // string_equal snippet_end + exit_if_error("Cannot query messages", err); + std::cout << "Message with condition = 'condition #7'" << std::endl; + PrintMessages(metadatas, consumer); + + // int_compare snippet_start + // the query that has several requirements for user metadata + metadatas = consumer->QueryMessages("meta.somevalue > 30 AND meta.somevalue < 60", "default", &err); + // int_compare snippet_end + exit_if_error("Cannot query messages", err); + std::cout << "Message with 30 < somevalue < 60" << std::endl; + PrintMessages(metadatas, consumer); + + // timestamp snippet_start + // the query that is based on the message's timestamp + auto now = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now().time_since_epoch()).count(); + auto fifteen_minutes_ago = std::chrono::duration_cast<std::chrono::nanoseconds>((std::chrono::system_clock::now() - std::chrono::minutes(15)).time_since_epoch()).count(); + metadatas = consumer->QueryMessages("timestamp < " + std::to_string(now) + " AND timestamp > " + std::to_string(fifteen_minutes_ago), "default", &err); + // timestamp snippet_end + exit_if_error("Cannot query messages", err); + std::cout << "Messages in the last 15 minutes" << std::endl; + PrintMessages(metadatas, consumer); + + return EXIT_SUCCESS; +} diff --git a/docs/site/versioned_examples/version-22.03.0/install_cpp_clients.sh b/docs/site/versioned_examples/version-22.03.0/install_cpp_clients.sh new file mode 100644 index 0000000000000000000000000000000000000000..a4cee2bab3671490173714aeb0fefe7cb94f7789 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/install_cpp_clients.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +# you can also install Linux/Windows packages if you have root access (or install locally). +# take a look at http://nims.desy.de/extra/asapo/linux_packages/ or http://nims.desy.de/extra/asapo/windows10 for your OS. E.g. for Debian 10.7 +wget http://nims.desy.de/extra/asapo/linux_packages/debian10.7/asapo-dev-22.03.0-debian10.7.x86_64.deb +sudo apt install ./asapo-dev-22.03.0-debian10.7.x86_64.deb + + diff --git a/docs/site/versioned_examples/version-22.03.0/install_python_clients_pip.sh b/docs/site/versioned_examples/version-22.03.0/install_python_clients_pip.sh new file mode 100644 index 0000000000000000000000000000000000000000..7940d518bd19fb55c6cc557090cef6e72d05966a --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/install_python_clients_pip.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +pip3 install --user --trusted-host nims.desy.de --find-links=http://nims.desy.de/extra/asapo/linux_wheels asapo_producer==22.3.0 +pip3 install --user --trusted-host nims.desy.de --find-links=http://nims.desy.de/extra/asapo/linux_wheels asapo_consumer==22.3.0 +# you might need to update pip if the above commands error: pip3 install --upgrade pip + +# if that does not work (abi incompatibility, etc) you may try to install source packages +# take a look at http://nims.desy.de/extra/asapo/linux_packages/ or http://nims.desy.de/extra/asapo/windows10 for your OS. E.g. for Debian 10.7 +# wget http://nims.desy.de/extra/asapo/linux_packages/debian10.7/asapo_producer-22.03.0.tar.gz +# wget http://nims.desy.de/extra/asapo/linux_packages/debian10.7/asapo_consumer-22.03.0.tar.gz + +# pip3 install asapo_producer-22.03.0.tar.gz +# pip3 install asapo_consumer-22.03.0.tar.gz diff --git a/docs/site/versioned_examples/version-22.03.0/install_python_clients_pkg.sh b/docs/site/versioned_examples/version-22.03.0/install_python_clients_pkg.sh new file mode 100644 index 0000000000000000000000000000000000000000..9e235cd1b528b5dc987de5d729ddcd7562efa4bf --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/install_python_clients_pkg.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +# you can also install Linux/Windows packages if you have root access (or install locally). +# take a look at http://nims.desy.de/extra/asapo/linux_packages/ or http://nims.desy.de/extra/asapo/windows10 for your OS. E.g. for Debian 10.7 +wget http://nims.desy.de/extra/asapo/linux_packages/debian10.7/python-asapo-producer_22.03.0-debian10.7_amd64.deb +wget http://nims.desy.de/extra/asapo/linux_packages/debian10.7/python-asapo-consumer_22.03.0-debian10.7_amd64.deb + +sudo apt install ./python3-asapo-producer_22.03.0-debian10.7_amd64.deb +sudo apt install ./python3-asapo_consumer_22.03.0-debian10.7_amd64.deb diff --git a/docs/site/versioned_examples/version-22.03.0/python/acknowledgements.py b/docs/site/versioned_examples/version-22.03.0/python/acknowledgements.py new file mode 100644 index 0000000000000000000000000000000000000000..32d3055d059dd0e37d926dbf812bdf05694ef62d --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/python/acknowledgements.py @@ -0,0 +1,74 @@ +import asapo_consumer +import asapo_producer + +def callback(payload,err): + if err is not None and not isinstance(err, asapo_producer.AsapoServerWarning): + print("could not send: ",payload,err) + elif err is not None: + print("sent with warning: ",payload,err) + else: + print("successfuly sent: ",payload) + +endpoint = "localhost:8400" +beamtime = "asapo_test" + +token = str("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.e" +"yJleHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmNDNhbGZ" +"wOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdGVzdCIsIkV4dHJhQ" +"2xhaW1zIjp7IkFjY2Vzc1R5cGVzIjpbIndyaXRlIiwicmVhZCJ" +"dfX0.dkWupPO-ysI4t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU") + +path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test" + +producer = asapo_producer.create_producer(endpoint, 'processed', beamtime, 'auto', 'test_source', '', 1, 60000) +producer.set_log_level('error') + +# let's start with producing a sample of 10 simple messages +for i in range(1, 11): + producer.send(i, "processed/test_file_ack_" + str(i), ('message #' + str(i)).encode(), stream = "default", callback = callback) + +producer.wait_requests_finished(2000) + +consumer = asapo_consumer.create_consumer(endpoint, path_to_files, True, beamtime, "test_source", token, 5000) +group_id = consumer.generate_group_id() + +# the flag to separate the first attempt for message #3 +firstTryNegative = True + +# consume snippet_start +try: + while True: + data, meta = consumer.get_next(group_id, meta_only = False) + text_data = data.tobytes().decode("utf-8") + message_id = meta['_id'] + + # acknowledge all the messages except these + if message_id not in [3,5,7]: + print('Acknowledge the message #', message_id) + consumer.acknowledge(group_id, message_id) + + # for message #3 we issue a negative acknowledgement, which will put it at the next place in the stream + # in this case, it will be put in the end of a stream + if message_id == 3: + if firstTryNegative: + print('Negative acknowledgement of the message #', message_id) + # make the acknowledgement with a delay of 1 second + consumer.neg_acknowledge(group_id, message_id, delay_ms=2000) + firstTryNegative = False + else: + # on our second attempt we acknowledge the message + print('Second try of the message #', message_id) + consumer.acknowledge(group_id, message_id) + +except asapo_consumer.AsapoStreamFinishedError: + print('stream finished') + +except asapo_consumer.AsapoEndOfStreamError: + print('stream ended') +# consume snippet_end + +# print snippet_start +for message_id in consumer.get_unacknowledged_messages(group_id): + data, meta = consumer.get_by_id(message_id, meta_only = False) + print('Unacknowledged message:', data.tobytes().decode("utf-8"), meta) +# print snippet_end diff --git a/docs/site/versioned_examples/version-22.03.0/python/consume.py b/docs/site/versioned_examples/version-22.03.0/python/consume.py new file mode 100644 index 0000000000000000000000000000000000000000..1b095f6d3d72fc734a3d2208212e75efe8fb2d36 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/python/consume.py @@ -0,0 +1,58 @@ +import asapo_consumer + +#create snippet_start +endpoint = "localhost:8400" +beamtime = "asapo_test" + +# test token. In production it is created during the start of the beamtime +token = str("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.e" +"yJleHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmNDNhbGZ" +"wOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdGVzdCIsIkV4dHJhQ" +"2xhaW1zIjp7IkFjY2Vzc1R5cGVzIjpbIndyaXRlIiwicmVhZCJ" +"dfX0.dkWupPO-ysI4t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU") + +# set it according to your configuration. +path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test" + + +consumer = asapo_consumer \ + .create_consumer(endpoint, + path_to_files, + True, # True if the path_to_files is accessible locally, False otherwise + beamtime, # Same as for the producer + "test_source", # Same as for the producer + token, # Access token + 5000, # Timeout. How long do you want to wait on non-finished stream for a message. + 'test_consumer_instance', # conumser instance id (can be 'auto') + 'pipeline_step_1' # pipeline step id + ) +#create snippet_end + +#list snippet_start +for stream in consumer.get_stream_list(): + print("Stream name: ", stream['name'], "\n", + "LastId: ", stream['lastId'], "\n", + "Stream finished: ", stream['finished'], "\n", + "Next stream: ", stream['nextStream']) +#list snippet_end + +#consume snippet_start +group_id = consumer.generate_group_id() # Several consumers can use the same group_id to process messages in parallel + +try: + # get_next is the main function to get messages from streams. You would normally call it in loop. + # you can either manually compare the meta['_id'] to the stream['lastId'], or wait for the exception to happen + while True: + data, meta = consumer.get_next(group_id, meta_only = False) + print(data.tobytes().decode("utf-8"), meta) + +except asapo_consumer.AsapoStreamFinishedError: + print('stream finished') # all the messages in the stream were processed + +except asapo_consumer.AsapoEndOfStreamError: + print('stream ended') # not-finished stream timeout, or wrong or empty stream +#consume snippet_end + +#delete snippet_start +consumer.delete_stream(error_on_not_exist = True) # you can delete the stream after consuming +#delete cnippet_end diff --git a/docs/site/versioned_examples/version-22.03.0/python/consume_dataset.py b/docs/site/versioned_examples/version-22.03.0/python/consume_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..8ed7711d784c5e8b5ef65f99cdeed5846d5ebbec --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/python/consume_dataset.py @@ -0,0 +1,39 @@ +import asapo_consumer + +endpoint = "localhost:8400" +beamtime = "asapo_test" + +endpoint = "localhost:8400" +beamtime = "asapo_test" + +token = str("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.e" +"yJleHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmNDNhbGZ" +"wOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdGVzdCIsIkV4dHJhQ" +"2xhaW1zIjp7IkFjY2Vzc1R5cGVzIjpbIndyaXRlIiwicmVhZCJ" +"dfX0.dkWupPO-ysI4t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU") + +path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test" + +consumer = asapo_consumer.create_consumer(endpoint, path_to_files, True, beamtime, "test_source", token, 5000) + +group_id = consumer.generate_group_id() + +# dataset snippet_start +try: + # get_next_dataset behaves similarly to the regular get_next + while True: + dataset = consumer.get_next_dataset(group_id, stream = 'default') + print ('Dataset Id:', dataset['id']) + # the initial response only contains the metadata + # the actual content should be retrieved separately + for metadata in dataset['content']: + data = consumer.retrieve_data(metadata) + print ('Part ' + str(metadata['dataset_substream']) + ' out of ' + str(dataset['expected_size'])) + print (data.tobytes().decode("utf-8"), metadata) + +except asapo_consumer.AsapoStreamFinishedError: + print('stream finished') + +except asapo_consumer.AsapoEndOfStreamError: + print('stream ended') +# dataset snippet_end diff --git a/docs/site/versioned_examples/version-22.03.0/python/metadata.py b/docs/site/versioned_examples/version-22.03.0/python/metadata.py new file mode 100644 index 0000000000000000000000000000000000000000..11346dc0890c59e9950655fc2e8e1fec0c6331a6 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/python/metadata.py @@ -0,0 +1,128 @@ +import asapo_consumer +import asapo_producer + +import json + +def callback(payload,err): + if err is not None and not isinstance(err, asapo_producer.AsapoServerWarning): + print("could not send: ",payload,err) + elif err is not None: + print("sent with warning: ",payload,err) + else: + print("successfuly sent: ",payload) + +endpoint = "localhost:8400" +beamtime = "asapo_test" + +token = str("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.e" +"yJleHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmNDNhbGZ" +"wOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdGVzdCIsIkV4dHJhQ" +"2xhaW1zIjp7IkFjY2Vzc1R5cGVzIjpbIndyaXRlIiwicmVhZCJ" +"dfX0.dkWupPO-ysI4t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU") + +path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test" + +producer = asapo_producer.create_producer(endpoint, 'processed', beamtime, 'auto', 'test_source', '', 1, 60000) +producer.set_log_level('error') + +# beamtime_set snippet_start +# sample beamtime metadata. You can add any data you want, with any level of complexity +# in this example we use strings and ints, and one nested structure +beamtime_metadata = { + 'name': 'beamtime name', + 'condition': 'beamtime condition', + 'intvalue1': 5, + 'intvalue2': 10, + 'structure': { + 'structint1': 20, + 'structint2': 30 + } +} + +# send the metadata +# by default the new metadata will completely replace the one that's already there +producer.send_beamtime_meta(json.dumps(beamtime_metadata), callback = callback) +# beamtime_set snippet_end + +# beamtime_update snippet_start +# we can update the existing metadata if we want, by modifying the existing fields, or adding new ones +beamtime_metadata_update = { + 'condition': 'updated beamtime condition', + 'newintvalue': 15 +} + +# send the metadata in the 'update' mode +producer.send_beamtime_meta(json.dumps(beamtime_metadata_update), mode = 'update', callback = callback) +# beamtime_update snippet_end + +# stream_set snippet_start +# sample stream metadata +stream_metadata = { + 'name': 'stream name', + 'condition': 'stream condition', + 'intvalue': 44 +} + +# works the same way: by default we replace the stream metadata, but update is also possible +# update works exactly the same as for beamtime, but here we will only do 'replace' +producer.send_stream_meta(json.dumps(stream_metadata), callback = callback) +# stream_set snippet_end + +# message_set snippet_start +# sample message metadata +message_metadata = { + 'name': 'message name', + 'condition': 'message condition', + 'somevalue': 55 +} + +# the message metadata is sent together with the message itself +# in case of datasets each part has its own metadata +producer.send(1, "processed/test_file", b'hello', user_meta = json.dumps(message_metadata), stream = "default", callback = callback) +# message_set snippet_end + +producer.wait_requests_finished(2000) + +consumer = asapo_consumer.create_consumer(endpoint, path_to_files, True, beamtime, "test_source", token, 5000) + +# beamtime_get snippet_start +# read the beamtime metadata +beamtime_metadata_read = consumer.get_beamtime_meta() + +# the structure is the same as the one that was sent, and the updated values are already there +print('Name:', beamtime_metadata_read['name']) +print('Condition:', beamtime_metadata_read['condition']) +print('Updated value exists:', 'newintvalue' in beamtime_metadata_read) +print('Sum of int values:', beamtime_metadata_read['intvalue1'] + beamtime_metadata_read['intvalue2']) +print('Nested structure value', beamtime_metadata_read['structure']['structint1']) +# beamtime_get snippet_end + +# stream_get snippet_start +# read the stream metadata +stream_metadata_read = consumer.get_stream_meta(stream = 'default') + +# access various fields from it +print('Stream Name:', stream_metadata_read['name']) +print('Stream Condition:', stream_metadata_read['condition']) +print('Stream int value:', stream_metadata_read['intvalue']) +# stream_get snippet_end + +group_id = consumer.generate_group_id() +try: + while True: + # message_get snippet_start + # right now we are only interested in metadata + data, meta = consumer.get_next(group_id, meta_only = True) + print('Message #', meta['_id']) + + # our custom metadata is stored inside the message metadata + message_metadata_read = meta['meta'] + print('Message Name:', message_metadata_read['name']) + print('Message Condition:', message_metadata_read['condition']) + print('Message int value:', message_metadata_read['somevalue']) + # message_get snippet_end +except asapo_consumer.AsapoStreamFinishedError: + print('stream finished') + +except asapo_consumer.AsapoEndOfStreamError: + print('stream ended') diff --git a/docs/site/versioned_examples/version-22.03.0/python/next_stream.py b/docs/site/versioned_examples/version-22.03.0/python/next_stream.py new file mode 100644 index 0000000000000000000000000000000000000000..d88638185b1d64f81023461d59111409c2c40af6 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/python/next_stream.py @@ -0,0 +1,72 @@ +import asapo_consumer +import asapo_producer + +def callback(payload,err): + if err is not None and not isinstance(err, asapo_producer.AsapoServerWarning): + print("could not send: ",payload,err) + elif err is not None: + print("sent with warning: ",payload,err) + else: + print("successfuly sent: ",payload) + +endpoint = "localhost:8400" +beamtime = "asapo_test" + +token = str("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.e" +"yJleHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmNDNhbGZ" +"wOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdGVzdCIsIkV4dHJhQ" +"2xhaW1zIjp7IkFjY2Vzc1R5cGVzIjpbIndyaXRlIiwicmVhZCJ" +"dfX0.dkWupPO-ysI4t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU") + +path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test" + +producer = asapo_producer.create_producer(endpoint, 'processed', beamtime, 'auto', 'test_source', '', 1, 60000) +producer.set_log_level('error') + +# let's start with producing a sample of 10 simple messages +for i in range(1, 11): + producer.send(i, "processed/test_file_" + str(i), ('content of the message #' + str(i)).encode(), stream = 'default', callback = callback) + +# next_stream_set snippet_start +# finish the stream and set the next stream to be called 'next' +producer.send_stream_finished_flag('default', i, next_stream = 'next', callback = callback) +# next_stream_set snippet_end + +# populate the 'next' stream as well +for i in range(1, 6): + producer.send(i, "processed/test_file_next_" + str(i), ('content of the message #' + str(i)).encode(), stream = 'next', callback = callback) + +# we leave the 'next' stream unfinished, but the chain of streams can be of any length + +producer.wait_requests_finished(2000) + +consumer = asapo_consumer.create_consumer(endpoint, path_to_files, True, beamtime, "test_source", token, 5000) +group_id = consumer.generate_group_id() + +# read_stream snippet_start +# we start with the 'default' stream (the first one) +stream_name = 'default' + +while True: + try: + data, meta = consumer.get_next(group_id, meta_only = False, stream = stream_name) + text_data = data.tobytes().decode("utf-8") + message_id = meta['_id'] + print('Message #', message_id, ':', text_data) + except asapo_consumer.AsapoStreamFinishedError: + # when the stream finishes, we look for the info on the next stream + # first, we find the stream with our name in the list of streams + stream = next(s for s in consumer.get_stream_list() if s['name'] == stream_name) + # then we look if the field 'nextStream' is set and not empty + if 'nextStream' in stream and stream['nextStream']: + # if it's not, we continue with the next stream + stream_name = stream['nextStream'] + print('Changing stream to the next one:', stream_name) + continue + # otherwise we stop + print('stream finished') + break + except asapo_consumer.AsapoEndOfStreamError: + print('stream ended') + break +# read_stream snippet_end diff --git a/docs/site/versioned_examples/version-22.03.0/python/pipeline.py b/docs/site/versioned_examples/version-22.03.0/python/pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..c2f8152b09dc6951db8a46dcb6396944e6cb1a71 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/python/pipeline.py @@ -0,0 +1,62 @@ +import asapo_consumer +import asapo_producer + +def callback(payload,err): + if err is not None and not isinstance(err, asapo_producer.AsapoServerWarning): + print("could not send: ",payload,err) + elif err is not None: + print("sent with warning: ",payload,err) + else: + print("successfuly sent: ",payload) + +endpoint = "localhost:8400" +beamtime = "asapo_test" + +token = str("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.e" +"yJleHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmNDNhbGZ" +"wOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdGVzdCIsIkV4dHJhQ" +"2xhaW1zIjp7IkFjY2Vzc1R5cGVzIjpbIndyaXRlIiwicmVhZCJ" +"dfX0.dkWupPO-ysI4t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU") + +path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test" + +consumer = asapo_consumer.create_consumer(endpoint, path_to_files, True, beamtime, "test_source", token, 5000) + +producer = asapo_producer.create_producer(endpoint, 'processed', beamtime, 'auto', 'test_source', '', 1, 60000) + +group_id = consumer.generate_group_id() +# pipeline snippet_start +# put the processed message into the new stream +pipelined_stream_name = 'pipelined' + +try: + while True: + # we expect the message to be in the 'default' stream already + data, meta = consumer.get_next(group_id, meta_only = False) + message_id = meta['_id'] + + # work on our data + text_data = data.tobytes().decode("utf-8") + pipelined_message = (text_data + ' processed').encode() + + # you may use the same filename, if you want to rewrite the source file. This will result in warning, but it is a valid usecase + producer.send(message_id, "processed/test_file_" + message_id, pipelined_message, pipelined_stream_name, callback = callback) + + +except asapo_consumer.AsapoStreamFinishedError: + print('stream finished') + +except asapo_consumer.AsapoEndOfStreamError: + print('stream ended') +# pipeline snippet_end +producer.wait_requests_finished(2000) + +# finish snippet_start +# the meta from the last iteration corresponds to the last message +last_id = meta['_id'] + +producer.send_stream_finished_flag("pipelined", last_id) +# finish snippet_end + +# you can remove the source stream if you do not need it anymore +consumer.delete_stream(stream = 'default', error_on_not_exist = True) diff --git a/docs/site/versioned_examples/version-22.03.0/python/produce.py b/docs/site/versioned_examples/version-22.03.0/python/produce.py new file mode 100644 index 0000000000000000000000000000000000000000..badc65110e05d525fec61adb2818efc4c44fb0c3 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/python/produce.py @@ -0,0 +1,54 @@ +import asapo_producer + +# callback snippet_start +def callback(payload,err): + if err is not None and not isinstance(err, asapo_producer.AsapoServerWarning): + # the data was not sent. Something is terribly wrong. + print("could not send: ",payload,err) + elif err is not None: + # The data was sent, but there was some unexpected problem, e.g. the file was overwritten. + print("sent with warning: ",payload,err) + else: + # all fine + print("successfuly sent: ",payload) +# callback snippet_end + +# create snippet_start +endpoint = "localhost:8400" +beamtime = "asapo_test" + +producer = asapo_producer \ + .create_producer(endpoint, + 'processed', # should be 'processed' or 'raw', 'processed' writes to the core FS + beamtime, # the folder should exist + 'auto', # can be 'auto', if beamtime_id is given + 'test_source', # source + '', # authorization token + 1, # number of threads. Increase, if the sending speed seems slow + 60000, # timeout. Do not change. + 'test_producer_instance', # producer instance id (can be 'auto') + 'pipeline_step_1' # pipeline step id + ) + +producer.set_log_level("error") # other values are "warning", "info" or "debug". +# create snippet_end + +# send snippet_start +# we are sending a message with with index 1 to the default stream. Filename must start with processed/ +producer.send(1, # message number. Should be unique and ordered. + "processed/test_file", # name of the file. Should be unique, or it will be overwritten + b"hello", # binary data + callback = callback) # callback +# send snippet_end +# send data in loop + +# add the following at the end of the script + +# finish snippet_start +producer.wait_requests_finished(2000) # will synchronously wait for all the data to be sent. + # Use it when no more data is expected. + +# you may want to mark the stream as finished +producer.send_stream_finished_flag("default", # name of the stream. If you didn't specify the stream in 'send', it would be 'default' + 1) # the number of the last message in the stream +# finish snippet_end diff --git a/docs/site/versioned_examples/version-22.03.0/python/produce_dataset.py b/docs/site/versioned_examples/version-22.03.0/python/produce_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..106229c430b979bba1e547fcc1fca4c1de4a7eb1 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/python/produce_dataset.py @@ -0,0 +1,29 @@ +import asapo_producer + +def callback(payload,err): + if err is not None and not isinstance(err, asapo_producer.AsapoServerWarning): + print("could not send: ",payload,err) + elif err is not None: + print("sent with warning: ",payload,err) + else: + print("successfuly sent: ",payload) + +endpoint = "localhost:8400" +beamtime = "asapo_test" + +producer = asapo_producer.create_producer(endpoint, 'processed', beamtime, 'auto', 'test_source', '', 1, 60000) + +# dataset snippet_start +#assuming we have three different producers for a single dataset + +# add the additional 'dataset' paremeter, which should be (<part_number>, <total_parts_in_dataset>) +producer.send(1, "processed/test_file_dataset_1", b"hello dataset 1", dataset = (1,3), callback = callback) +# this can be done from different producers in any order +producer.send(1, "processed/test_file_dataset_1", b"hello dataset 2", dataset = (2,3), callback = callback) +producer.send(1, "processed/test_file_dataset_1", b"hello dataset 3", dataset = (3,3), callback = callback) +# dataset snippet_end + +producer.wait_requests_finished(2000) +# the dataset parts are not counted towards the number of messages in the stream +# the last message id in this example is still 1 +producer.send_stream_finished_flag("default", 1) diff --git a/docs/site/versioned_examples/version-22.03.0/python/query.py b/docs/site/versioned_examples/version-22.03.0/python/query.py new file mode 100644 index 0000000000000000000000000000000000000000..83cfb51c9ef8dc00cd8e518fd5869f6e557453f5 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/python/query.py @@ -0,0 +1,86 @@ +import asapo_consumer +import asapo_producer + +import json +from datetime import datetime, timedelta + +def callback(payload,err): + if err is not None and not isinstance(err, asapo_producer.AsapoServerWarning): + print("could not send: ",payload,err) + elif err is not None: + print("sent with warning: ",payload,err) + else: + print("successfuly sent: ",payload) + +endpoint = "localhost:8400" +beamtime = "asapo_test" + +token = str("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.e" +"yJleHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmNDNhbGZ" +"wOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdGVzdCIsIkV4dHJhQ" +"2xhaW1zIjp7IkFjY2Vzc1R5cGVzIjpbIndyaXRlIiwicmVhZCJ" +"dfX0.dkWupPO-ysI4t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU") + +path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test" + +producer = asapo_producer.create_producer(endpoint, 'processed', beamtime, 'auto', 'test_source', '', 1, 60000) +producer.set_log_level('error') + +# let's start with producing some messages with metadata +for i in range(1, 11): + metadata = { + 'condition': 'condition #' + str(i), + 'somevalue': i * 10 + } + producer.send(i, "processed/test_file_" + str(i), ('message #' + str(i)).encode(), user_meta = json.dumps(metadata), stream = "default", callback = callback) + +producer.wait_requests_finished(2000) + +consumer = asapo_consumer.create_consumer(endpoint, path_to_files, True, beamtime, "test_source", token, 5000) + +# helper function to print messages +def print_messages(metadatas): + # the query will return the list of metadatas + for meta in metadatas: + # for each metadata we need to obtain the actual message first + data = consumer.retrieve_data(meta) + print('Message #', meta['_id'], ', content:', data.tobytes().decode("utf-8"), ', usermetadata:', meta['meta']) + +# by_id snippet_start +# simple query, same as get_by_id +metadatas = consumer.query_messages('_id = 1') +# by_id snippet_end +print('Message with ID = 1') +print_messages(metadatas) + +# by_ids snippet_start +# the query that requests the range of IDs +metadatas = consumer.query_messages('_id >= 8') +# by_ids snippet_end +print('Messages with ID >= 8') +print_messages(metadatas) + +# string_equal snippet_start +# the query that has some specific requirement for message metadata +metadatas = consumer.query_messages('meta.condition = "condition #7"') +# string_equal snippet_end +print('Message with condition = "condition #7"') +print_messages(metadatas) + +# int_compare snippet_start +# the query that has several requirements for user metadata +metadatas = consumer.query_messages('meta.somevalue > 30 AND meta.somevalue < 60') +# int_compare snippet_end +print('Message with 30 < somevalue < 60') +print_messages(metadatas) + +# timestamp snippet_start +# the query that is based on the message's timestamp +now = datetime.now() +fifteen_minutes_ago = now - timedelta(minutes = 15) +# python uses timestamp in seconds, while ASAP::O in nanoseconds, so we need to multiply it by a billion +metadatas = consumer.query_messages('timestamp < {} AND timestamp > {}'.format(now.timestamp() * 10**9, fifteen_minutes_ago.timestamp() * 10**9)) +# timestamp snippet_end +print('Messages in the last 15 minutes') +print_messages(metadatas) + diff --git a/docs/site/versioned_examples/version-22.03.0/start_asapo_socket.sh b/docs/site/versioned_examples/version-22.03.0/start_asapo_socket.sh new file mode 100644 index 0000000000000000000000000000000000000000..eed71b89a9cbc796c6ee90fb49529429d1b4dff0 --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/start_asapo_socket.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +set -e + +ASAPO_HOST_DIR=/var/tmp/asapo # you can change this if needed, make sure there is enough space ( >3GB on disk) + +NOMAD_ALLOC_HOST_SHARED=$ASAPO_HOST_DIR/container_host_shared/nomad_alloc +SERVICE_DATA_CLUSTER_SHARED=$ASAPO_HOST_DIR/asapo_cluster_shared/service_data +DATA_GLOBAL_SHARED=$ASAPO_HOST_DIR/global_shared/data +DATA_GLOBAL_SHARED_ONLINE=$ASAPO_HOST_DIR/global_shared/online_data +MONGO_DIR=$SERVICE_DATA_CLUSTER_SHARED/mongodb + +ASAPO_USER=`id -u`:`id -g` + +mkdir -p $NOMAD_ALLOC_HOST_SHARED $SERVICE_DATA_CLUSTER_SHARED $DATA_GLOBAL_SHARED $DATA_GLOBAL_SHARED_ONLINE +chmod 777 $NOMAD_ALLOC_HOST_SHARED $SERVICE_DATA_CLUSTER_SHARED $DATA_GLOBAL_SHARED $DATA_GLOBAL_SHARED_ONLINE + +cd $SERVICE_DATA_CLUSTER_SHARED +mkdir -p fluentd grafana influxdb influxdb2 mongodb prometheus alertmanager +chmod 777 * + +docker run --privileged --rm -v /var/run/docker.sock:/var/run/docker.sock \ + -u $ASAPO_USER \ + --group-add `getent group docker | cut -d: -f3` \ + -v $NOMAD_ALLOC_HOST_SHARED:$NOMAD_ALLOC_HOST_SHARED \ + -v $SERVICE_DATA_CLUSTER_SHARED:$SERVICE_DATA_CLUSTER_SHARED \ + -v $DATA_GLOBAL_SHARED:$DATA_GLOBAL_SHARED \ + -e NOMAD_ALLOC_DIR=$NOMAD_ALLOC_HOST_SHARED \ + -e TF_VAR_service_dir=$SERVICE_DATA_CLUSTER_SHARED \ + -e TF_VAR_online_dir=$DATA_GLOBAL_SHARED_ONLINE \ + -e TF_VAR_offline_dir=$DATA_GLOBAL_SHARED \ + -e TF_VAR_mongo_dir=$MONGO_DIR \ + -e TF_VAR_asapo_user=$ASAPO_USER \ + -e ACL_ENABLED=true \ + --name asapo --net=host -d yakser/asapo-cluster:22.03.0 + +sleep 15 +docker exec asapo jobs-start diff --git a/docs/site/versioned_examples/version-22.03.0/start_asapo_tcp.sh b/docs/site/versioned_examples/version-22.03.0/start_asapo_tcp.sh new file mode 100644 index 0000000000000000000000000000000000000000..293f7f21580446e2e71069758515392723dfd60e --- /dev/null +++ b/docs/site/versioned_examples/version-22.03.0/start_asapo_tcp.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +set -e + +ASAPO_HOST_DIR=/var/tmp/asapo # you can change this if needed, make sure there is enough space ( >3GB on disk) +# change this according to your Docker configuration +DOCKER_ENDPOINT="127.0.0.1:2376" +DOCKER_TLS_CA=/usr/local/docker/certs/$USER/ca.pem +DOCKER_TLS_KEY=/usr/local/docker/certs/$USER/key.pem +DOCKER_TLS_CERT=/usr/local/docker/certs/$USER/cert.pem + + +NOMAD_ALLOC_HOST_SHARED=$ASAPO_HOST_DIR/container_host_shared/nomad_alloc +SERVICE_DATA_CLUSTER_SHARED=$ASAPO_HOST_DIR/asapo_cluster_shared/service_data +DATA_GLOBAL_SHARED=$ASAPO_HOST_DIR/global_shared/data +DATA_GLOBAL_SHARED_ONLINE=$ASAPO_HOST_DIR/global_shared/online_data +MONGO_DIR=$SERVICE_DATA_CLUSTER_SHARED/mongodb + +ASAPO_USER=`id -u`:`id -g` + +mkdir -p $NOMAD_ALLOC_HOST_SHARED $SERVICE_DATA_CLUSTER_SHARED $DATA_GLOBAL_SHARED $DATA_GLOBAL_SHARED_ONLINE +chmod 777 $NOMAD_ALLOC_HOST_SHARED $SERVICE_DATA_CLUSTER_SHARED $DATA_GLOBAL_SHARED $DATA_GLOBAL_SHARED_ONLINE + +cd $SERVICE_DATA_CLUSTER_SHAREDdetector +mkdir -p fluentd grafana influxdb influxdb2 mongodb prometheus alertmanager +chmod 777 * + +docker run --privileged --userns=host --security-opt no-new-privileges --rm \ + -u $ASAPO_USER \ + -v $NOMAD_ALLOC_HOST_SHARED:$NOMAD_ALLOC_HOST_SHARED \ + -v $SERVICE_DATA_CLUSTER_SHARED:$SERVICE_DATA_CLUSTER_SHARED \ + -v $DATA_GLOBAL_SHARED:$DATA_GLOBAL_SHARED \ + -e NOMAD_ALLOC_DIR=$NOMAD_ALLOC_HOST_SHARED \ + -e TF_VAR_service_dir=$SERVICE_DATA_CLUSTER_SHARED \ + -e TF_VAR_online_dir=$DATA_GLOBAL_SHARED_ONLINE \ + -e TF_VAR_offline_dir=$DATA_GLOBAL_SHARED \ + -e TF_VAR_mongo_dir=$MONGO_DIR \ + -e TF_VAR_asapo_user=$ASAPO_USER \ + -e ACL_ENABLED=true \ + -v $DOCKER_TLS_CA:/etc/nomad/ca.pem \ + -v $DOCKER_TLS_KEY:/etc/nomad/key.pem \ + -v $DOCKER_TLS_CERT:/etc/nomad/cert.pem \ + -e DOCKER_ENDPOINT=$DOCKER_ENDPOINT \ + --name asapo --net=host -d yakser/asapo-cluster:22.03.0 + +sleep 15 +docker exec asapo jobs-start diff --git a/docs/site/versioned_sidebars/version-22.03.0-sidebars.json b/docs/site/versioned_sidebars/version-22.03.0-sidebars.json new file mode 100644 index 0000000000000000000000000000000000000000..dea1b7277ef853ef935de08715c934fe4698163f --- /dev/null +++ b/docs/site/versioned_sidebars/version-22.03.0-sidebars.json @@ -0,0 +1,93 @@ +{ + "version-22.03.0/docs": [ + { + "type": "doc", + "id": "version-22.03.0/getting-started" + }, + { + "type": "doc", + "id": "version-22.03.0/overview" + }, + { + "type": "doc", + "id": "version-22.03.0/compare-to-others" + }, + { + "collapsed": true, + "type": "category", + "label": "Concepts And Architecture", + "items": [ + { + "type": "doc", + "id": "version-22.03.0/data-in-asapo" + }, + { + "type": "doc", + "id": "version-22.03.0/producer-clients" + }, + { + "type": "doc", + "id": "version-22.03.0/consumer-clients" + }, + { + "type": "doc", + "id": "version-22.03.0/core-architecture" + } + ] + }, + { + "collapsed": true, + "type": "category", + "label": "Use Cases", + "items": [ + { + "type": "doc", + "id": "version-22.03.0/p02.1" + } + ] + }, + { + "collapsed": true, + "type": "category", + "label": "Code Examples", + "items": [ + { + "type": "doc", + "id": "version-22.03.0/cookbook/overview" + }, + { + "type": "doc", + "id": "version-22.03.0/cookbook/simple-producer" + }, + { + "type": "doc", + "id": "version-22.03.0/cookbook/simple-consumer" + }, + { + "type": "doc", + "id": "version-22.03.0/cookbook/simple-pipeline" + }, + { + "type": "doc", + "id": "version-22.03.0/cookbook/datasets" + }, + { + "type": "doc", + "id": "version-22.03.0/cookbook/acknowledgements" + }, + { + "type": "doc", + "id": "version-22.03.0/cookbook/metadata" + }, + { + "type": "doc", + "id": "version-22.03.0/cookbook/next_stream" + }, + { + "type": "doc", + "id": "version-22.03.0/cookbook/query" + } + ] + } + ] +} diff --git a/docs/site/versions.json b/docs/site/versions.json index e4f75c466b8295b10f918a5fa14d42e0fc499519..1f77ee6888be0d175e204c40c77b0853113ea7c1 100644 --- a/docs/site/versions.json +++ b/docs/site/versions.json @@ -1,4 +1,5 @@ [ + "22.03.0", "21.12.0", "21.09.0", "21.06.0"