Skip to content
Snippets Groups Projects
Commit 56facdbe authored by Sergey Yakubov's avatar Sergey Yakubov
Browse files

Merge pull request #162 in ASAPO/asapo from docs to develop

* commit 'c36e1436':
  fix cmake
  fix typo
  fix site build
  fix site build
  fix version number
  added versioning for site
  fix site build
  add code examples
  update docs
parents 9842cfac c36e1436
No related branches found
No related tags found
No related merge requests found
Showing
with 517 additions and 268 deletions
...@@ -3,7 +3,7 @@ function(cleanup varname) ...@@ -3,7 +3,7 @@ function(cleanup varname)
SET( ${varname} ${out} PARENT_SCOPE) SET( ${varname} ${out} PARENT_SCOPE)
endfunction() endfunction()
execute_process(COMMAND git describe --tags --abbrev=0 execute_process(COMMAND git describe --tags --abbrev=0
OUTPUT_VARIABLE ASAPO_TAG OUTPUT_VARIABLE ASAPO_TAG
WORKING_DIRECTORY ..) WORKING_DIRECTORY ..)
string(STRIP ${ASAPO_TAG} ASAPO_TAG) string(STRIP ${ASAPO_TAG} ASAPO_TAG)
...@@ -20,6 +20,7 @@ execute_process(COMMAND git rev-parse --short=10 HEAD ...@@ -20,6 +20,7 @@ execute_process(COMMAND git rev-parse --short=10 HEAD
string(STRIP ${ASAPO_VERSION_COMMIT} ASAPO_VERSION_COMMIT) string(STRIP ${ASAPO_VERSION_COMMIT} ASAPO_VERSION_COMMIT)
if (${BRANCH} STREQUAL "master") if (${BRANCH} STREQUAL "master")
SET (ASAPO_VERSION_IN_DOCS ${ASAPO_TAG})
SET (ASAPO_VERSION ${ASAPO_TAG}) SET (ASAPO_VERSION ${ASAPO_TAG})
SET (ASAPO_VERSION_COMMIT "") SET (ASAPO_VERSION_COMMIT "")
SET (ASAPO_VERSION_DOCKER_SUFFIX "") SET (ASAPO_VERSION_DOCKER_SUFFIX "")
...@@ -37,6 +38,10 @@ else() ...@@ -37,6 +38,10 @@ else()
SET (ASAPO_WHEEL_VERSION ${ASAPO_VERSION}) SET (ASAPO_WHEEL_VERSION ${ASAPO_VERSION})
endif() endif()
string(REGEX REPLACE "\\.0([0-9]+)\\."
".\\1." ASAPO_WHEEL_VERSION_IN_DOCS
${ASAPO_VERSION_IN_DOCS})
message("Asapo Version: " ${ASAPO_VERSION}) message("Asapo Version: " ${ASAPO_VERSION})
message("Python Asapo Version: " ${PYTHON_ASAPO_VERSION}) message("Python Asapo Version: " ${PYTHON_ASAPO_VERSION})
message("Asapo commit: " ${ASAPO_VERSION_COMMIT}) message("Asapo commit: " ${ASAPO_VERSION_COMMIT})
...@@ -3,6 +3,10 @@ project(ASAPO) ...@@ -3,6 +3,10 @@ project(ASAPO)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/CMakeModules/ ${PROJECT_SOURCE_DIR}/CMakeIncludes/) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/CMakeModules/ ${PROJECT_SOURCE_DIR}/CMakeIncludes/)
set (ASAPO_VERSION_IN_DOCS 21.06.0) # is overwritten in master branch
set (ASAPO_EXAMPLES_DIR .)
#set (ASAPO_EXAMPLES_DIR ./frozen_versions/${ASAPO_VERSION_IN_DOCS})
#protocol version changes if one of the microservice API's change #protocol version changes if one of the microservice API's change
set (ASAPO_CONSUMER_PROTOCOL "v0.4") set (ASAPO_CONSUMER_PROTOCOL "v0.4")
set (ASAPO_PRODUCER_PROTOCOL "v0.3") set (ASAPO_PRODUCER_PROTOCOL "v0.3")
......
...@@ -2,9 +2,18 @@ configure_files(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} true) ...@@ -2,9 +2,18 @@ configure_files(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} true)
configure_files(${CMAKE_CURRENT_SOURCE_DIR}/blog ${CMAKE_CURRENT_BINARY_DIR}/blog) configure_files(${CMAKE_CURRENT_SOURCE_DIR}/blog ${CMAKE_CURRENT_BINARY_DIR}/blog)
configure_files(${CMAKE_CURRENT_SOURCE_DIR}/docs ${CMAKE_CURRENT_BINARY_DIR}/docs) configure_files(${CMAKE_CURRENT_SOURCE_DIR}/docs ${CMAKE_CURRENT_BINARY_DIR}/docs)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/static DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/static DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/versioned_docs DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/versioned_sidebars DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/src DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/src DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/plugins DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/plugins DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/examples DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
configure_files(${CMAKE_CURRENT_SOURCE_DIR}/examples/getting_started ${CMAKE_CURRENT_BINARY_DIR}/examples/getting_started)
add_custom_target( add_custom_target(
site ALL site ALL
COMMAND COMMAND
......
---
title: Comparison to Other Solutions
---
Here we consider how ASAPO is different from other workflows practiced at DESY. The possible candidates are:
### Filesystem
Probably the most often used approach for now. Files are written to the beamline filesystem directly via NFS/SMB mount or by HiDRA and copied to the core filesystem by a copy daemon. A user (software) then reads the files from the filesystem.
### Filesystem + Kafka
Previous workflow + there is a Kafka instance that produces messages when a file appears in the core filesystem. These messages can then be consumed by user software.
### HiDRA
HiDRA can work in two modes - wether data is transferred via it or data is written over NFS/SMB mounts and HiDRA monitors a folder in a beamline filesystem. In both case one can subscribe to HiDRA's data queue to be informed about a new file.
### ASAPO
ASAPO does not work with files, rather with data streams. Well, behind the scene it does use files, but in principle what a user see is a stream of messages, where a message typically consists of metadata and data blobs. Data blob can be a single image, a file with arbitrary content or whatever else, even null. Important is - what goes to one end, appears on the other. And that each message must have a consecutive index. These messages must be ingested to an ASAPO data stream in some way (e.g. using ASAPO Producer API or HiDRA) and data, if not yet, will be transferred and stored in the data center. A user can then read the messages from the stream and process it in a way he likes.
### Compare by categories
In the table below we compare the approaches from different points of view and in the [next table](#compare-by-features) we compare the approaches by available features.
| Category | Filesystem | Filesystem+Kafka | HiDRA | ASAPO |
|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Data ingest | traditional way - write to disk | same as Filesystem, additionally a message is send to a Kafka topic with basic file metadata (name, size, creation date, ...) | outperforms data ingest via NFS/SMB, uses ZeroMQ(TCP), saturates 10GE bandwidth. To be tested with 100GE. Same metadata as with Kafka | uses parallel TCP connections & in the future RDMA. Can add arbitrary metadata (JSON format). Can send data to various streams to create arbitrary data hierarchy (e.g. stream per scan). Saves data to a memory buffer and on disk. |
| Offline analysis | traditional way - read from disk. Need to know the filename to read, usually not a problem. | same as Filesystem, there is no benefits reading Kafka queue after all data is arrived | not possible, need to fallback to Filesystem | same as online analysis (see below) |
| Online analysis | no efficient way to recognise that new data is available - periodically read folder content and compare with previous state?, periodically check file appeared? | using a subscription to a "new files" topic, a user software can be made aware of new data quite soon and react correspondingly. | one can subscribe to a HiDRA's ZeroMQ stream and consume data. If data arrives faster than it can be processed or e.g. user software crashes - data might be skipped. | one can get data from various streams in different ways - get next unprocessed message ordered by index, get last, get by id. Since everything is stored in persistent storage, processing is possible with arbitrary slow (but also fast) consumers. Resilient to connections loss or consumer crashes. |
| Performance | as good as read/write to disk. Can be an issue, especially with new detectors 100GE+ networks. | as good as read/write to disk + some latency for the file to be written to the beamline filesystem, copied to the core filesystem and a message to go through Kafka. | data is available as soon as it is received by HiDRA. If multiple consumer groups want to read same data (one consumer is known - the process that writes the file), data will be transferred multiple times, which influences the network performance. | data is available to be consumed as soon as it is arrived and saved to beamline filesystem (later can be optimised by using persistent memory instead). No need to read from disk since it also remains in memory buffer. |
| Parallelisation | Parallelisation is easily possible e.g. with an MPI library. | Parallelisation is possible if Kafka's topics are partitioned (which is not the case in the moment) | Not out of the box, possible with some changes from user's side | Parallelisation is easily possible, one can consume data concurrently with different consumers from the same stream. Normally, synchronisation between consumers is not needed, but this might depend on a use case. When configured, data can be resent if not acknowledged during a specified time period. |
| Search/filter | hardly possible, manually parsing some metadata file, using POSIX commands? | same as Filesystem. There is Kafka SQL query language which could be used if there would be metadata in messages, which is not the case (yet?). | not possible | one can use a set of SQL queries to ingested metadata. |
| General comments | Might be ok for slow detectors or/and a use case without online analysis requirements. Might be the only way to work with legacy applications | Fits well for the cases where a software just need a trigger that some new data has arrived, when processing order, extra metadata, parallelisation is not that important or implemented by other means. Some delay between an image is generated and the event is emitted by Kafka is there, but probably not that significant (maximum a couple of seconds). Might be not that appropriate for very fast detectors since still using filesystem to write/read data. | Works quite well to transfer files from detector to the data center. Also a good candidate for live viewers, where the last available "image" should be displayed. Does not work for offline analysis or for near real-time analysis where image processing can take longer than image taking. | Tries to be a general solution which improves in areas where other approaches not suffice: single code for offline/near real-time/online analysis, parallelisation, extended metadata, efficient memory/storage management, getting data without access to filesystem (e.g. from detector pc without core filesystem mounted), computational pipelines, ... Everything has its own price: user software must be modified to use ASAPO, a wrapper might be needed for legacy software that cannot be modified, user/beamtime scientist should better structure the data - e.g. consecutive indexes must be available for each image, one has to define to which stream write/read data, what is the format of the data, ... |
### Compare by features
| Feature | Filesystem | Filesystem+Kafka | HiDRA | ASAPO |
|--------------------------------------------------------------------------------------------------------------------|------------------------------------|------------------------------------|------------------------------------|------------------------------------|
| send metadata with image | No | No | No | Yes |
| get last image | No | No | Yes | Yes |
| get image by id | No | No | No | Yes |
| get image in order | No | No | No | Yes |
| Immediately get informed that a new image is arrived | No | Yes | Yes | Yes |
| access image remotely, without reading filesystem | No | No | Yes, if it is still in buffer | Yes |
| access past images | Yes | Yes | No | Yes |
| need to change user code | No | Yes | Yes | Yes |
| parallelisation | Yes (if user software allows that) | Not out of the box | Not out of the box | Yes |
| legacy applications | Yes | No (wrapper could be a workaround) | No (wrapper could be a workaround) | No (wrapper could be a workaround) |
| transparent restart/continuation of simulations in case e.g. worker process crashes, also for parallel simulations | Not out of the box | Yes | No | Yes |
\ No newline at end of file
---
title: Consumer Clients
---
Consumer client (or consumer) is a part of a distributed streaming system that is responsible for processing streams of data that were created by producer. It is usually a user (beamline scientist, detector developer, physicist, ... ) responsibility to develop a client for specific beamline, detector or experiment using ASAPO Consumer API and ASAPO responsibility to make sure data is delivered to consumers in an efficient and reliable way.
![Docusaurus](/img/consumer-clients.png)
Consumer API is available for C++ and Python and has the following main functionality:
- Create a consumer instance and bind it to a specific beamtime and data source
- multiple instances can be created (also within a single application) to receive data from different sources
- a beamtime token is used for access control
- If needed (mainly for get_next_XX operations), create a consumer group that allows to process messages independently from other groups
- Receive messages from a specific stream (you can read more [here](data-in-asapo) about data in ASAPO)
- GetNext to receive process messages one after another without need to know message indexes
- Consumer API returns a message with index 1, then 2, ... as they were set by producer.
- This also works in parallel so that payload is distributed within multiple consumers within same consumer group or between threads of a single consumer instance. In parallel case order of indexes of the messages is not determined.
- GetLast to receive last available message - for e.g. live visualisation
- GetById - get message by index - provides random access
- Make queries based on metadata contained in a message - returns all messages in a stream with specific metadata. A subset of SQL language is used
All of the above functions can return only metadata part of the message, so that an application can e.g. extract the filename and pass it to a 3rd party tool for processing. Alternative, a function may return the complete message with metadata and data so that consumer can directly process it. An access to the filesystem where data is actually stored is not required in this case.
:::note
In case of dataset family of functions, only list of dataset messages is returned, the data can be retrieved in a separate call.
:::
Please refer to [C++](http://asapo.desy.de/cpp/) and [Python](http://asapo.desy.de/python/) documentation for specific details (available from DESY intranet only).
---
title: Core Architecture
---
For those who are curious about ASAPO architecture, the diagram shows some details. Here arrows with numbers is an example of data workflow explained below.
![Docusaurus](/img/core-architecture.png)
## Data Workflow (example)
the workflow can be split into two more or less independent tasks - data ingestion and data retrieval
### Data ingestion (numbers with i on the diagram)
1i) As we [know](producer-clients.md), producer client is responsible for ingesting data in the system. Therefore the first step is to detect that the new message is available. This can be done using another tool developed at DESY named [HiDRA](https://confluence.desy.de/display/FSEC/HiDRA). This tool monitors the source of data (e.g. by monitoring a filesystem or using HTTP request or ZeroMQ streams, depending on detector type)
2i) HiDRA (or other user application) then uses ASAPO Producer API to send messages (M1 and M2 in our case) in parallel to ASAPO Receiver. TCP/IP or RDMA protocols are used to send data most efficiently. ASAPO Receiver receives data in a memory cache
3i) - 4i) ASAPO saves data to a filesystem and adds a metadata record to a database
5i) A feedback is send to the producer client with success or error message (in case of error, some of the step above may not happen)
### Data retrieval (numbers with r on the diagram)
[Consumer client](consumer-clients.md)) is usually a user application that retrieves data from the system to analyse/process it.
The first step to retrieve a message via Consumer API is to pass the request to the Data Broker (1r). The Data Broker retrieves the metadata information about the message from the database (2r) and returns it to the Consumer Client. The Consumer Client analyses the metadata information and decides how to get the data. It the data is still in the Receiver memory cache, the client requests data from there via a Data Server (which is a part of ASAPO Receiver). Otherwise, client gets the data from the filesystem - directly if the filesystem is accessible on the machine where the client is running or via File Transfer Service if not.
---
title: Create a Blog Post
---
This page will help you on how to create blog posts in Docusaurus.
## Create a Blog Post
Create a file at `blog/2021-02-28-greetings.md`:
```md title="blog/2021-02-28-greetings.md"
---
title: Greetings!
author: Steven Hansel
author_title: Docusaurus Contributor
author_url: https://github.com/ShinteiMai
author_image_url: https://github.com/ShinteiMai.png
---
Congratulations, you have made your first post!
Feel free to play around and edit this post as much you like.
```
A new blog post is now available at `http://localhost:3000/blog/greetings`.
---
title: Create a Document
---
Documents are pages with a **sidebar**, a **previous/next navigation** and many other useful features.
## Create a Document
Create a markdown file at `docs/my-doc.md`:
```mdx title="docs/hello.md"
---
title: Hello, World!
---
## Hello, World!
This is your first document in **Docusaurus**, Congratulations!
```
A new document is now available at `http://localhost:3000/docs/hello`.
## Add your document to the sidebar
Add `hello` to the `sidebars.js` file:
```diff title="sidebars.js"
module.exports = {
docs: [
{
type: 'category',
label: 'Docusaurus Tutorial',
- items: ['getting-started', 'create-a-doc', ...],
+ items: ['getting-started', 'create-a-doc', 'hello', ...],
},
],
};
```
---
title: Create a Page
---
Any React or Markdown file created under `src/pages` directory is converted into a website page:
- `src/pages/index.js` -> `localhost:3000/`
- `src/pages/foo.md` -> `localhost:3000/foo`
- `src/pages/foo/bar.js` -> `localhost:3000/foo/bar`
## Create a React Page
Create a file at `src/pages/my-react-page.js`:
```jsx title="src/pages/my-react-page.js"
import React from 'react';
import Layout from '@theme/Layout';
function HelloWorld() {
return (
<Layout>
<h1>My React page</h1>
<p>This is a React page</p>
</Layout>
);
}
```
A new page is now available at `http://localhost:3000/my-react-page`.
## Create a Markdown Page
Create a file at `src/pages/my-markdown-page.md`:
```mdx title="src/pages/my-markdown-page.md"
---
title: My Markdown page
---
# My Markdown page
This is a Markdown page
```
A new page is now available at `http://localhost:3000/my-markdown-page`.
---
title: Data in ASAPO
---
All data that is produced, stored and consumed via ASAPO is structured on several levels.
#### Beamtime
This is the top level. Contains all data collected/produced during a single beamtime (Beamtime is the term used at DESY. Can also be Run, Experiment, Proposal, ...). Each beamtime has its own unique ID.
#### Data Source
During a beamtime, data can be produced by different sources. For example, a detector is a data source, if multiple detectors are used during an experiment, they can be different data sources or the same data source (more details below in datasets section). A user application that simulates or analyses data can also act as an ASAPO data source. Each data source has its own unique name within a beamtime.
#### Data Stream
Each data source can emit multiple data streams. Each stream has a unique within a specific data source name.
#### Message
Data streams consist of smaller entities - messages. The content of a message is quite flexible, to be able to cover a broad amount of use cases. Usually it is a metadata and some binary data (e.g. a detector image, or an hdf5 file with multiple images). At the moment ASAPO itself is agnostic to the data and sees it as a binary array. Later some specific cases might be handled as well (the most prominent use case - an hdf5 file with multiple images).
An important aspect is that each message within a data stream must be assigned a consecutive integer index. Therefore, a streams always contain messages with index = 1,2,3 ... . This is different to traditional messaging systems where messages have timestamps or arbitrary unique hash IDs. The reason is that with timestamps the order of messages saved in the system might differ from the order the were generated by the data source (e.g. detector). And keeping correct order is required in many cases. Second reason is that it makes a random access to a specific message quite straightforward.
#### Datasets/Dataset substreams
In some cases multiple detectors are using during an experiment. E.g. a 3D image is composed from multiple 2D images created by different detectors. In this case these 2D images can be composed to a dataset so that it a be processed later as a whole. One would then use a single data source (which would mean a set of detectors or "multi-detector" in this case), single data stream and, to compose a dataset, for each of it's components (each 2D image in our example) the corresponding detector would send a message with same id but to a different dataset substream.
So, for the case without datasets (single detector) the data hierarchy is Beamtime→Data Source → Data Stream → Message:
![Docusaurus](/img/data-in-asapo-workflow.png)
And with datasets (multi-detector) the data hierarchy is Beamtime→Data Source → Data Stream → Dataset→ Message in Dataset Substream:
![Docusaurus](/img/data-in-asapo-workflow2.png)
...@@ -14,10 +14,10 @@ If you already have running ASAPO services and know the endpoint, you can skip t ...@@ -14,10 +14,10 @@ If you already have running ASAPO services and know the endpoint, you can skip t
Otherwise, for testing purposes one can start ASAPO services in a standalone mode (this is not recommended for production deployment). Otherwise, for testing purposes one can start ASAPO services in a standalone mode (this is not recommended for production deployment).
The easiest way is to use a Docker container. The easiest way is to use a Docker container.
So, make sure Docker is installed and you have necessary permissions to use it. So, make sure Docker is installed and you have necessary permissions to use it.
Please note that this will only work on a Linux machine. Also please note that ASAPO needs some ports to be available. You can check the list Please note that this will only work on a Linux machine. Also please note that ASAPO needs some ports to be available. You can check the list
[here](https://stash.desy.de/projects/ASAPO/repos/asapo/browse/deploy/asapo_services/scripts/asapo.auto.tfvars.in?at={ASAPO_VERSION}#37). [here](https://stash.desy.de/projects/ASAPO/repos/asapo/browse/deploy/asapo_services/scripts/asapo.auto.tfvars.in?at=@ASAPO_VERSION_IN_DOCS@#37).
Now, depending on how your Docker daemon is configured (if it uses a Now, depending on how your Docker daemon is configured (if it uses a
unix socket or a tcp port for communications) unix socket or a tcp port for communications)
...@@ -32,14 +32,14 @@ unix socket or a tcp port for communications) ...@@ -32,14 +32,14 @@ unix socket or a tcp port for communications)
}> }>
<TabItem value="unix"> <TabItem value="unix">
```shell content="getting_started/start_asapo_socket.sh" ```shell content="@ASAPO_EXAMPLES_DIR@/getting_started/start_asapo_socket.sh"
``` ```
</TabItem> </TabItem>
<TabItem value="tcp"> <TabItem value="tcp">
```shell content="getting_started/start_asapo_tcp.sh" ```shell content="@ASAPO_EXAMPLES_DIR@/getting_started/start_asapo_tcp.sh"
``` ```
</TabItem> </TabItem>
...@@ -64,9 +64,9 @@ mkdir -p $ASAPO_HOST_DIR/global_shared/online_data/test/current/raw ...@@ -64,9 +64,9 @@ mkdir -p $ASAPO_HOST_DIR/global_shared/online_data/test/current/raw
mkdir -p $ASAPO_HOST_DIR/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test mkdir -p $ASAPO_HOST_DIR/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test
``` ```
:::note ASAP::O in production mode :::note ASAP::O in production mode
We have a running instance for processing data collected during beamtimes. Please get in touch with FS-SC group for more information. We have a running instance for processing data collected during experiments. Please get in touch with FS-SC group for more information.
::: :::
...@@ -84,31 +84,161 @@ Now you can install Python packages or C++ libraries for ASAPO Producer and Cons ...@@ -84,31 +84,161 @@ Now you can install Python packages or C++ libraries for ASAPO Producer and Cons
}> }>
<TabItem value="python-pip"> <TabItem value="python-pip">
```shell content="getting_started/install_python_clients_pip.sh" snippetTag="#snippet1" ```shell content="@ASAPO_EXAMPLES_DIR@/getting_started/install_python_clients_pip.sh" snippetTag="#snippet1"
``` ```
</TabItem> </TabItem>
<TabItem value="python-packages"> <TabItem value="python-packages">
```shell content="getting_started/install_python_clients_pkg.sh" ```shell content="@ASAPO_EXAMPLES_DIR@/getting_started/install_python_clients_pkg.sh"
``` ```
</TabItem> </TabItem>
<TabItem value="cpp"> <TabItem value="cpp">
```shell content="getting_started/install_cpp_clients.sh" ```shell content="@ASAPO_EXAMPLES_DIR@/getting_started/install_cpp_clients.sh"
``` ```
</TabItem> </TabItem>
</Tabs> </Tabs>
## Step 3: Produce a message {#step-3}
Run the development server in the newly created `my-website` folder: <Tabs
groupId="language"
defaultValue="python"
values={[
{ label: 'Python', value: 'python', },
{ label: 'C++', value: 'cpp', },
]
}>
<TabItem value="python">
Open `docs/getting-started.md` and edit some lines. The site reloads automatically and display your changes. Now you can write a Producer client (API documentation [here](http://asapo.desy.de/python/producer.html)).
```shell content="@ASAPO_EXAMPLES_DIR@/getting_started/python/produce.py"
```
Execute it with python3
```
$ python3 produce.py
```
</TabItem>
<TabItem value="cpp">
Now you can write a Producer client (API documentation [here](http://asapo.desy.de/cpp/producer)).
```shell content="@ASAPO_EXAMPLES_DIR@/getting_started/cpp/produce.cpp"
```
Compile e.g. using CMake and execute. You might need to point cmake (with CMAKE_PREFIX_PATH) to asapo installation and curl library if installed to non-standard location.
```shell content="@ASAPO_EXAMPLES_DIR@/getting_started/cpp/CMakeLists.txt" snippetTag="#producer"
```
```
$ cmake . && make
$ ./asapo-produce
```
</TabItem>
</Tabs>
the output should look like
```
{"time":"***** *****","source":"producer_api","level":"info","message":"authorized connection to receiver at ****:****"}
successfuly sent: {"id": 1, "buffer": "test_file"}
```
## Step 3: Produce a message {#step-3}
## Step 4: Consume a message {#step-4} ## Step 4: Consume a message {#step-4}
A consumer data that reads the message ingested during step 3. Note that a token is needed to work with data. In production, the token is provided during start of the beamtime.
<Tabs
groupId="language"
defaultValue="python"
values={[
{ label: 'Python', value: 'python', },
{ label: 'C++', value: 'cpp', },
{ label: 'C', value: 'c', },
]
}>
<TabItem value="python">
Complete API documentation [here](http://asapo.desy.de/python/consumer.html)
```shell content="@ASAPO_EXAMPLES_DIR@/getting_started/python/consume.py"
```
Execute it with python3
```
$ python3 consumer.py
```
</TabItem>
<TabItem value="cpp">
```shell content="@ASAPO_EXAMPLES_DIR@/getting_started/cpp/consume.cpp"
```
Compile e.g. using CMake and execute. You might need to point cmake (with CMAKE_PREFIX_PATH) to asapo installation and curl library if installed to non-standard location.
```shell content="@ASAPO_EXAMPLES_DIR@/getting_started/cpp/CMakeLists.txt" snippetTag="#consumer"
```
```
$ cmake . && make
$ ./asapo-consume
```
</TabItem>
<TabItem value="c">
```shell content="@ASAPO_EXAMPLES_DIR@/getting_started/c/consume.c"
```
Compile e.g. using Makefile and pkg-config (although we recommend CMake - see C++ section) and execute. This example assumes asapo is installed to /opt/asapo. Adjust correspondingly.
```shell content="@ASAPO_EXAMPLES_DIR@/getting_started/c/Makefile" snippetTag="#consumer"
```
```
$ make
$ ./asapo-consume
```
</TabItem>
</Tabs>
the output should look like
```
id: 1
file name: processed/test_file
file content: hello
stream deleted
```
## Step 5: Clean-up ## Step 5: Clean-up
Optionally, last step is to stop ASAPO services and remove files:
```shell content="@ASAPO_EXAMPLES_DIR@/getting_started/cleanup.sh"
```
<br/><br/>
:::tip
You can see more examples in ASAPO [source code](https://stash.desy.de/projects/ASAPO/repos/asapo/browse/examples?at=@ASAPO_VERSION_IN_DOCS@)
:::
---
title: Markdown Features
---
Docusaurus supports the [Markdown](https://daringfireball.net/projects/markdown/syntax) syntax and has some additional features.
## Front Matter
Markdown documents can have associated metadata at the top called [Front Matter](https://jekyllrb.com/docs/front-matter/):
```md
---
id: my-doc
title: My document title
description: My document description
sidebar_label: My doc
---
Markdown content
```
## Markdown links
Regular Markdown links are supported using url paths or relative file paths.
```md
Let's see how to [Create a page](/create-a-page).
```
```md
Let's see how to [Create a page](./create-a-page.md).
```
Let's see how to [Create a page](./create-a-page.md).
## Markdown images
Regular Markdown images are supported.
Add an image at `static/img/docusaurus.png` and use this Markdown declaration:
```md
![Docusaurus logo](/img/docusaurus.png)
```
![Docusaurus logo](/img/docusaurus.png)
## Code Blocks
Markdown code blocks are supported with Syntax highlighting.
```jsx title="src/components/HelloDocusaurus.js"
function HelloDocusaurus() {
return (
<h1>Hello, Docusaurus!</h1>
)
}
```
```jsx title="src/components/HelloDocusaurus.js"
function HelloDocusaurus() {
return <h1>Hello, Docusaurus!</h1>;
}
```
## Admonitions
Docusaurus has a special syntax to create admonitions and callouts:
:::tip My tip
Use this awesome feature option
:::
:::danger Take care
This action is dangerous
:::
:::tip My tip
Use this awesome feature option
:::
:::danger Take care
This action is dangerous
:::
## React components
Thanks to [MDX](https://mdxjs.com/), you can make your doc more interactive and use React components inside Markdown:
```jsx
export const Highlight = ({children, color}) => (
<span
style={{
backgroundColor: color,
borderRadius: '2px',
color: 'red',
padding: '0.2rem',
}}>
{children}
</span>
);
<Highlight color="#25c2a0">Docusaurus green</Highlight> and <Highlight color="#1877F2">Facebook blue</Highlight> are my favorite colors.
```
export const Highlight = ({children, color}) => (
<span
style={{
backgroundColor: color,
borderRadius: '2px',
color: '#fff',
padding: '0.2rem',
}}>
{children}
</span>
);
<Highlight color="#25c2a0">Docusaurus green</Highlight> and <Highlight color="#1877F2">
Facebook blue
</Highlight> are my favorite colors.
---
title: Overview
---
ASAP::O (or ASAPO) is a high performance distributed streaming platform. It is being developed at DESY and is mainly aimed to support online/offline analysis of experimental data produced at its facilities. The ideas behind are quite similar to that of Apache Kafka and similar messaging solutions, but ASAPO is developed and tuned for scientific use cases with their specific workflows and where the size of the messages is much large (MBs to GBs as compared to KBs in traditional systems).
ASAPO has the following key capabilities:
- Deliver data produced by an experimental facility (e.g. detector) to a data center in a high-performant fault-tolerant way
- Consume this data in various modes (as soon as new data occurs, random access, latest available data, in parallel, ...)
- Ingest own data/ create computational pipelines
ASAPO consists of the following three components:
- Core services (run in background on a single node or a cluster and provide ASAPO functionality)
- Producer API to ingest data into the system
- Consumer API to retrieve data from the system
### Bird's eye view
A workflow when using ASAPO can be represented as follows:
![Docusaurus](/img/asapo_bird_eye.png)
Usually, an end user can see ASAPO core services as a black box. But some more details are given [here](core-architecture).
Next, one can learn more about following concepts:
- [Data in ASAPO](data-in-asapo)
- [Producer clients](producer-clients)
- [Consumer clients](consumer-clients)
You can also compare with other solutions, jump directly to [Getting Started](getting-started.mdx) or have a look in use cases section
---
title: ASAP::O at P02.1
---
Online analysis at P02.1 has two main goals:
- Doing as much beamline specific data analysis as possible for the user, so that they can concentrate on analyzing the experiment specific details. This will lead to a comprehensive support for the user from beamline side and therefore lead to a higher user satisfaction. Automatization of the analysis is essential to achieve the necessary high throughput, which is mandatory for current and future diffraction applications.
- Enabling timely decisions through a "live" view of raw images and analyzed data. Problems with the measurement can often be more easily detected in the analyzed data, which should be made available to the user as early as possible to avoid wasting valuable measurement time on suboptimal experimental conditions.
## Description of a typical beamtime at P02.1
- A beamtime consists of a number of scans
- Each scan consists of one or more steps
- At each step, an image is taken by the detectors, as well as several other scalar sensors values are gathered, e.g., temperature, electric current, position, etc.
- The parameters for the analysis are fixed during one scan but might need to change from one scan to the next
## Analysis Pipeline
- Images are taken by one or two detectors
- Optionally, a number of consecutive images of a single detector are merged into one averaged image to reduce the noise
- The (averaged) images are stored into one NeXus file per detector per scan
- Each (averaged) image is analyzed independently
- The analyzed data is written to one NeXus file per detector per scan
- All scalar sensor data and additional metadata is written to one NeXus file per scan that links to the other NeXus files with the (averaged) images and analyzed data
- A viewer displays the live and history output of all relevant processing steps
![Docusaurus](/img/Asapo_Analysis_Pipeline_P02-1.png)
## Use of ASAPO
In the following, ASAPO specific details for the pipeline of a single detector are given. For multiple detectors, all stream names are suffixed by the detector ID.
1. The data acquisition software stores the parameters for the analysis in a "scan-metadata" stream with one substream per scan and one metadata entry per substream
2. Images are ingested into ASAPO
3. The images taken by the detectors are written to the beamline filesystem by HiDRA (one file per image)
4. HiDRA inserts the files into ASAPO. It assigns the files to the correct "detector" stream based on the file name. Each stream uses one substream per scan, its name is also extracted from the filename by HiDRA. This applies to the index within a substream as well.
5. If enabled, one "averager" worker per detector stream reads the files from the "detector" stream and emits the averaged images into the "averaged" stream. The name of the substream of the input is used for the name of the output substream. The indices within a substream are chosen consecutively.
6. One "nexus-writer" worker per detector reads the images either from the "detector" or the "averaged" stream. All images of a single substream are stored into one file. The filename is constructed from the name of the stream and substream the image belongs to. The index within a substream corresponds to the index within the HDF5 dataset.
7. Multiple "asapo-dawn" worker read their parameters from the "scan-metadata" stream at the start of each substream. The images are read from the "detector" or "averaged" stream. The worker emit the resulting data into an "analyzed" stream with the same substream name as the input and the same index.
8. One "nexus-write" worker per detector reads the analyzed data from the "analyzed" stream and writes it into one NeXus file per substream. The filename is constructed from the name of the stream and substream the data belongs to. The index within a substream corresponds to the index within the HDF5 dataset.
9. The data acquisition software stores all scalar data and all additional scan-metadata in a master NeXus file that links to the NeXus files produced by the ASAPO workers.
10. The viewer listens to all streams and parses the metadata to create a continuously updated tree view of all available data. Clicking on an item uses get_by_id to retrieve the actual data. A "live" mode automatically retrieves the latest data.
\ No newline at end of file
---
title: Producer Clients
---
Producer client (or producer) is a part of a distributed streaming system that is responsible for creating data streams (i.e. ingesting data in the system). It is usually a user (beamline scientist, detector developer, physicist, ... ) responsibility to develop a client for specific beamline, detector or experiment using ASAPO Producer API and ASAPO responsibility to make sure data is transferred and saved an in efficient and reliable way.
![Docusaurus](/img/producer-clients.png)
Producer API is available for C++ and Python and has the following main functionality:
- Create a producer instance and bind it to a specific beamtime and data source
multiple instances can be created (also within a single application) to send data from different sources
- Send messages to a specific stream (you can read more [here](data-in-asapo) about data in ASAPO)
- each message must have a consecutive integer index, ASAPO does not create indexes automatically
- to compose datasets, dataset substream (and dataset size) should be send along with each message
- messages are sent asynchronously, in parallel using multiple threads
- retransfer will be attempted in case of system failure
- a callback function can be provided to react after data was sent/process error
Please refer to [C++](http://asapo.desy.de/cpp/) and [Python](http://asapo.desy.de/python/) documentation for specific details (available from DESY intranet only).
---
title: Thank you!
---
Congratulations on making it this far!
You have learned the **basics of Docusaurus** and made some changes to the **initial template**.
But Docusaurus has **much more to offer**!
## What's next?
- [Read the official documentation](https://v2.docusaurus.io/).
- [Design and Layout your Docusaurus site](https://v2.docusaurus.io/docs/styling-layout)
- [Integrate a search bar into your site](https://v2.docusaurus.io/docs/search)
- [Find inspirations in Docusaurus showcase](https://v2.docusaurus.io/showcase)
- [Get involved in the Docusaurus Community](https://v2.docusaurus.io/community/support)
...@@ -14,7 +14,7 @@ module.exports = { ...@@ -14,7 +14,7 @@ module.exports = {
organizationName: 'DESY', // Usually your GitHub org/user name. organizationName: 'DESY', // Usually your GitHub org/user name.
projectName: 'ASAPO', // Usually your repo name. projectName: 'ASAPO', // Usually your repo name.
customFields: { customFields: {
version: '@ASAPO_VERSION@', version: '@ASAPO_VERSION_IN_DOCS@',
}, },
plugins: [path.resolve(__dirname, 'plugins/webpackconf/src/index.js')], plugins: [path.resolve(__dirname, 'plugins/webpackconf/src/index.js')],
themeConfig: { themeConfig: {
...@@ -47,7 +47,17 @@ module.exports = { ...@@ -47,7 +47,17 @@ module.exports = {
], ],
}, },
{ {
href: 'https://stash.desy.de/projects/ASAPO/repos/asapo/browse?at=@ASAPO_VERSION@/', type: 'docsVersionDropdown',
//// Optional
position: 'right',
// Add additional dropdown items at the beginning/end of the dropdown.
dropdownItemsBefore: [],
dropdownItemsAfter: [],
dropdownActiveClassDisabled: true,
docsPluginId: 'default',
},
{
href: 'https://stash.desy.de/projects/ASAPO/repos/asapo/browse?at=@ASAPO_VERSION_IN_DOCS@/',
label: 'BitBucket', label: 'BitBucket',
title: 'BitBucket', title: 'BitBucket',
position: 'right', position: 'right',
...@@ -65,6 +75,12 @@ module.exports = { ...@@ -65,6 +75,12 @@ module.exports = {
{ {
docs: { docs: {
sidebarPath: require.resolve('./sidebars.js'), sidebarPath: require.resolve('./sidebars.js'),
versions: {
current: {
"label": "Develop",
"path": "next"
},
},
}, },
blog: { blog: {
showReadingTime: true, showReadingTime: true,
......
!Makefile
\ No newline at end of file
PROGRAM=asapo-consume
LDFLAGS = "-Wl,-rpath,/opt/asapo/lib"
CFLAGS += `PKG_CONFIG_PATH=/opt/asapo/lib/pkgconfig pkg-config --cflags libasapo-consumer`
LIBS = `PKG_CONFIG_PATH=/opt/asapo/lib/pkgconfig pkg-config --libs libasapo-consumer`
# for default installation
#LDFLAGS =
#CFLAGS += `pkg-config --cflags libasapo-consumer`
#LIBS = `pkg-config --libs libasapo-consumer`
RM=rm -f
SRCS=consume.c
OBJS=$(subst .c,.o,$(SRCS))
all: $(PROGRAM)
$(PROGRAM): $(OBJS)
$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
%.o: %.cpp
$(CC) $(CFLAGS) $(INCLUDE) -c -o $@ $<
clean:
$(RM) $(OBJS)
distclean: clean
$(RM) $(PROGRAM)
#include "asapo/consumer_c.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
void exit_if_error(const char *error_string, const AsapoErrorHandle err) {
if (asapo_is_error(err)) {
char buf[1024];
asapo_error_explain(err, buf, sizeof(buf));
printf("%s %s\n", error_string, buf);
exit(EXIT_FAILURE);
}
}
int main(int argc, char* argv[]) {
AsapoErrorHandle err = asapo_new_handle();
AsapoMessageMetaHandle mm = asapo_new_handle();
AsapoMessageDataHandle data = asapo_new_handle();
const char *endpoint = "localhost:8400";
const char *beamtime = "asapo_test";
const char *token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjk1NzE3MTAyMTYsImp0aSI6ImMzaXFhbGpmNDNhbGZwOHJua20wIiwic3ViIjoiYnRfYXNhcG9fdGVzdCIsIkV4dHJhQ2xhaW1zIjp7IkFjY2Vzc1R5cGVzIjpbIndyaXRlIiwicmVhZCJdfX0.dkWupPO-ysI4t-jtWiaElAzDyJF6T7hu_Wz_Au54mYU";
const char * path_to_files = "/var/tmp/asapo/global_shared/data/test_facility/gpfs/test/2019/data/asapo_test"; //set it according to your configuration.
AsapoSourceCredentialsHandle cred = asapo_create_source_credentials(kProcessed,
beamtime,
"", "test_source", token);
AsapoConsumerHandle consumer = asapo_create_consumer(endpoint,
path_to_files, 1,
cred,
&err);
asapo_free_handle(&cred);
exit_if_error("Cannot create consumer", err);
asapo_consumer_set_timeout(consumer, 5000ull);
AsapoStringHandle group_id = asapo_consumer_generate_new_group_id(consumer, &err);
exit_if_error("Cannot create group id", err);
asapo_consumer_get_next(consumer, group_id, &mm, &data, "default",&err);
exit_if_error("Cannot get next record", err);
printf("id: %llu\n", (unsigned long long)asapo_message_meta_get_id(mm));
printf("file name: %s\n", asapo_message_meta_get_name(mm));
printf("file content: %s\n", asapo_message_data_get_as_chars(data));
// delete stream
asapo_consumer_delete_stream(consumer,"default", 1,1,&err);
exit_if_error("Cannot delete stream", err);
printf("stream deleted\n");
asapo_free_handle(&err);
asapo_free_handle(&mm);
asapo_free_handle(&data);
asapo_free_handle(&consumer);
asapo_free_handle(&group_id);
return EXIT_SUCCESS;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment