diff --git a/deploy/asapo_services/asap3.tfvars b/deploy/asapo_services/asap3.tfvars index 22b5c25334a8788c705279942108fc92b20e797d..972dafffdeb937973bdf418d21d7b038514f6236 100644 --- a/deploy/asapo_services/asap3.tfvars +++ b/deploy/asapo_services/asap3.tfvars @@ -13,6 +13,7 @@ receiver_total_memory_size = 35000 receiver_dataserver_cache_size = 30 #gb receiver_receive_to_disk_threshold = 50 # mb receiver_dataserver_nthreads = 8 +receiver_network_modes = "tcp,fabric" grafana_total_memory_size = 2000 influxdb_total_memory_size = 2000 diff --git a/deploy/asapo_services/build_image.sh b/deploy/asapo_services/build_image.sh index ddc6bd83ccdc8582c6455d0ecc91aca9ae2a6484..69b3e638d370540da56478932995c0ca570397a3 100755 --- a/deploy/asapo_services/build_image.sh +++ b/deploy/asapo_services/build_image.sh @@ -1,3 +1,4 @@ #!/usr/bin/env bash docker build -t yakser/asapo-cluster . +docker push yakser/asapo-cluster diff --git a/deploy/asapo_services/run.sh b/deploy/asapo_services/run.sh index 12e23fbd336e194172de41d6debb5dedaaebc742..0a4f38fc588f203f46a55db165818d9a4645406b 100755 --- a/deploy/asapo_services/run.sh +++ b/deploy/asapo_services/run.sh @@ -28,35 +28,35 @@ chmod 777 * mmc=`cat /proc/sys/vm/max_map_count` if (( mmc < 262144 )); then - echo increase max_map_count - needed for elasticsearch - exit 1 +echo increase max_map_count - needed for elasticsearch +exit 1 fi if [ -f $ASAPO_VAR_FILE ]; then - chmod 666 $ASAPO_VAR_FILE - MOUNT_VAR_FILE="-v $ASAPO_VAR_FILE:/var/run/asapo/user_vars.tfvars" +chmod 666 $ASAPO_VAR_FILE +MOUNT_VAR_FILE="-v $ASAPO_VAR_FILE:/var/run/asapo/user_vars.tfvars" fi docker run --privileged --rm -v /var/run/docker.sock:/var/run/docker.sock \ - -u $ASAPO_USER \ - --group-add `getent group docker | cut -d: -f3` \ - -v /var/lib/docker:/var/lib/docker \ - -v $NOMAD_ALLOC_HOST_SHARED:$NOMAD_ALLOC_HOST_SHARED \ - -v $SERVICE_DATA_CLUSTER_SHARED:$SERVICE_DATA_CLUSTER_SHARED \ - -v $DATA_GLOBAL_SHARED:$DATA_GLOBAL_SHARED \ - -e NOMAD_ALLOC_DIR=$NOMAD_ALLOC_HOST_SHARED \ - -e TF_VAR_service_dir=$SERVICE_DATA_CLUSTER_SHARED \ - -e TF_VAR_online_dir=$DATA_GLOBAL_SHARED_ONLINE \ - -e TF_VAR_offline_dir=$DATA_GLOBAL_SHARED \ - -e TF_VAR_mongo_dir=$MONGO_DIR \ - $MOUNT_VAR_FILE \ - -e ADVERTISE_IP=$ADVERTISE_IP \ - -e RECURSORS=$RECURSORS \ - -e TF_VAR_asapo_user=$ASAPO_USER \ - -e IB_ADDRESS=$IB_ADDRESS \ - -e ACL_ENABLED=$ACL_ENABLED \ - -e SERVER_ADRESSES=$SERVER_ADRESSES \ - -e N_SERVERS=$N_SERVERS \ - --name asapo --net=host -d yakser/asapo-cluster +-u $ASAPO_USER \ +--group-add `getent group docker | cut -d: -f3` \ +-v /var/lib/docker:/var/lib/docker \ +-v $NOMAD_ALLOC_HOST_SHARED:$NOMAD_ALLOC_HOST_SHARED \ +-v $SERVICE_DATA_CLUSTER_SHARED:$SERVICE_DATA_CLUSTER_SHARED \ +-v $DATA_GLOBAL_SHARED:$DATA_GLOBAL_SHARED \ +-e NOMAD_ALLOC_DIR=$NOMAD_ALLOC_HOST_SHARED \ +-e TF_VAR_service_dir=$SERVICE_DATA_CLUSTER_SHARED \ +-e TF_VAR_online_dir=$DATA_GLOBAL_SHARED_ONLINE \ +-e TF_VAR_offline_dir=$DATA_GLOBAL_SHARED \ +-e TF_VAR_mongo_dir=$MONGO_DIR \ + $MOUNT_VAR_FILE \ +-e ADVERTISE_IP=$ADVERTISE_IP \ +-e RECURSORS=$RECURSORS \ +-e TF_VAR_asapo_user=$ASAPO_USER \ +-e IB_ADDRESS=$IB_ADDRESS \ +-e ACL_ENABLED=$ACL_ENABLED \ +-e SERVER_ADRESSES=$SERVER_ADRESSES \ +-e N_SERVERS=$N_SERVERS \ +--name asapo --net=host -d yakser/asapo-cluster diff --git a/deploy/asapo_services/run_maxwell.sh b/deploy/asapo_services/run_maxwell.sh index cdba4bda248f91270bf7b6f713936a65daad660b..e5271d009987c83a5955165daac4f1d00b22800f 100755 --- a/deploy/asapo_services/run_maxwell.sh +++ b/deploy/asapo_services/run_maxwell.sh @@ -28,8 +28,8 @@ DOCKER_TLS_CERT=/data/netapp/docker/certs/$USER/cert.pem #adresses to use USE_IB_FOR_RECEIVER=true if [ "$USE_IB_FOR_RECEIVER" == "true" ]; then - IB_HOSTNAME=`hostname --short`-ib - IB_ADDRESS=`getent hosts $IB_HOSTNAME | awk '{ print $1 }'` +IB_HOSTNAME=`hostname --short`-ib +IB_ADDRESS=`getent hosts $IB_HOSTNAME | awk '{ print $1 }'` fi #ADVERTISE_IP= #set if differs from default @@ -49,7 +49,7 @@ chmod 777 * #todo: elastic search check mmc=`cat /proc/sys/vm/max_map_count` if (( mmc < 262144 )); then - echo consider increasing max_map_count - needed for elasticsearch +echo consider increasing max_map_count - needed for elasticsearch # exit 1 fi @@ -58,32 +58,32 @@ docker rm -f asapo docker pull yakser/asapo-cluster if [ -f $ASAPO_VAR_FILE ]; then - MOUNT_VAR_FILE="-v $ASAPO_VAR_FILE:/var/run/asapo/user_vars.tfvars" +MOUNT_VAR_FILE="-v $ASAPO_VAR_FILE:/var/run/asapo/user_vars.tfvars" fi dockerrun --rm \ - -u $ASAPO_USER \ - -v /scratch/docker/100000.100000:/scratch/docker/100000.100000 \ - -v $NOMAD_ALLOC_HOST_SHARED:$NOMAD_ALLOC_HOST_SHARED \ - -v $SERVICE_DATA_CLUSTER_SHARED:$SERVICE_DATA_CLUSTER_SHARED \ - -v $DOCKER_TLS_CA:/etc/nomad/ca.pem \ - -v $DOCKER_TLS_KEY:/etc/nomad/key.pem \ - -v $DOCKER_TLS_CERT:/etc/nomad/cert.pem \ - -v $DATA_GLOBAL_SHARED:$DATA_GLOBAL_SHARED \ - $MOUNT_VAR_FILE \ - -e NOMAD_ALLOC_DIR=$NOMAD_ALLOC_HOST_SHARED \ - -e TF_VAR_service_dir=$SERVICE_DATA_CLUSTER_SHARED \ - -e TF_VAR_online_dir=$DATA_GLOBAL_SHARED_ONLINE \ - -e TF_VAR_offline_dir=$DATA_GLOBAL_SHARED \ - -e TF_VAR_mongo_dir=$MONGO_DIR \ - -e ADVERTISE_IP=$ADVERTISE_IP \ - -e RECURSORS=$RECURSORS \ - -e TF_VAR_asapo_user=$ASAPO_USER \ - -e IB_ADDRESS=$IB_ADDRESS \ - -e ACL_ENABLED=$ACL_ENABLED \ - -e SERVER_ADRESSES=$SERVER_ADRESSES \ - -e ASAPO_LIGHTWEIGHT_SERVICE_NODES=$ASAPO_LIGHTWEIGHT_SERVICE_NODES \ - -e DOCKER_ENDPOINT=$DOCKER_ENDPOINT \ - -e N_SERVERS=$N_SERVERS \ - --name asapo yakser/asapo-cluster +-u $ASAPO_USER \ +-v /scratch/docker/100000.100000:/scratch/docker/100000.100000 \ +-v $NOMAD_ALLOC_HOST_SHARED:$NOMAD_ALLOC_HOST_SHARED \ +-v $SERVICE_DATA_CLUSTER_SHARED:$SERVICE_DATA_CLUSTER_SHARED \ +-v $DOCKER_TLS_CA:/etc/nomad/ca.pem \ +-v $DOCKER_TLS_KEY:/etc/nomad/key.pem \ +-v $DOCKER_TLS_CERT:/etc/nomad/cert.pem \ +-v $DATA_GLOBAL_SHARED:$DATA_GLOBAL_SHARED \ +$MOUNT_VAR_FILE \ +-e NOMAD_ALLOC_DIR=$NOMAD_ALLOC_HOST_SHARED \ +-e TF_VAR_service_dir=$SERVICE_DATA_CLUSTER_SHARED \ +-e TF_VAR_online_dir=$DATA_GLOBAL_SHARED_ONLINE \ +-e TF_VAR_offline_dir=$DATA_GLOBAL_SHARED \ +-e TF_VAR_mongo_dir=$MONGO_DIR \ +-e ADVERTISE_IP=$ADVERTISE_IP \ +-e RECURSORS=$RECURSORS \ +-e TF_VAR_asapo_user=$ASAPO_USER \ +-e IB_ADDRESS=$IB_ADDRESS \ +-e ACL_ENABLED=$ACL_ENABLED \ +-e SERVER_ADRESSES=$SERVER_ADRESSES \ +-e ASAPO_LIGHTWEIGHT_SERVICE_NODES=$ASAPO_LIGHTWEIGHT_SERVICE_NODES \ +-e DOCKER_ENDPOINT=$DOCKER_ENDPOINT \ +-e N_SERVERS=$N_SERVERS \ +--name asapo yakser/asapo-cluster diff --git a/deploy/asapo_services/scripts/asapo-receivers.nmd.tpl b/deploy/asapo_services/scripts/asapo-receivers.nmd.tpl index efe823c70e6294c890b7293cfd621a547f3c0758..f0aa1a79ad38078764ba6c1112ce7284be404bfc 100644 --- a/deploy/asapo_services/scripts/asapo-receivers.nmd.tpl +++ b/deploy/asapo_services/scripts/asapo-receivers.nmd.tpl @@ -30,6 +30,7 @@ job "asapo-receivers" { network_mode = "host" security_opt = ["no-new-privileges"] userns_mode = "host" + privileged = true image = "yakser/asapo-receiver${image_suffix}" force_pull = true volumes = ["local/config.json:/var/lib/receiver/config.json", @@ -79,11 +80,11 @@ job "asapo-receivers" { receiver_dataserver_cache_size = "${receiver_dataserver_cache_size}" receiver_dataserver_nthreads = "${receiver_dataserver_nthreads}" receiver_receive_to_disk_threshold = "${receiver_receive_to_disk_threshold}" + receiver_network_modes = "${receiver_network_modes}" } - template { - source = "${scripts_dir}/receiver_tcp.json.tpl" + source = "${scripts_dir}/receiver.json.tpl" destination = "local/config.json" change_mode = "restart" } diff --git a/deploy/asapo_services/scripts/asapo.auto.tfvars.in b/deploy/asapo_services/scripts/asapo.auto.tfvars.in index 7c7c153422e7a5154dd812716d13409f75041fa9..2212ce1f349579fc874591fbf5884a5db746269b 100644 --- a/deploy/asapo_services/scripts/asapo.auto.tfvars.in +++ b/deploy/asapo_services/scripts/asapo.auto.tfvars.in @@ -18,6 +18,7 @@ receiver_total_memory_size = "2000" receiver_dataserver_cache_size = "1" #gb receiver_dataserver_nthreads = 4 receiver_receive_to_disk_threshold = 50 #mb +receiver_network_modes = "tcp" grafana_total_memory_size = "256" diff --git a/deploy/asapo_services/scripts/receiver.json.tpl b/deploy/asapo_services/scripts/receiver.json.tpl index 3216854994de0dc9698bfefc78d6e2c15b7218de..ec2ef0969aad4b576bb5aeb6e77bfc89fcc21f1f 100644 --- a/deploy/asapo_services/scripts/receiver.json.tpl +++ b/deploy/asapo_services/scripts/receiver.json.tpl @@ -10,7 +10,7 @@ "AdvertiseURI": "{{ if or (env "meta.ib_address") "none" | regexMatch "none" }}{{ env "NOMAD_IP_recv" }}{{ else }}{{ env "meta.ib_address" }}{{ end }}:{{ env "NOMAD_PORT_recv_ds" }}", "NThreads": {{ env "NOMAD_META_receiver_dataserver_nthreads" }}, "ListenPort": {{ env "NOMAD_PORT_recv_ds" }}, - "NetworkMode": ["tcp"] + "NetworkMode": ["{{ env "NOMAD_META_receiver_network_modes" | split "," | join "\",\"" }}"] }, "DataCache": { "Use": true, diff --git a/deploy/asapo_services/scripts/templates.tf b/deploy/asapo_services/scripts/templates.tf index 50b860111241aa7d6d587e74d9b62b16575adb92..90f6b71ec4bc3f5cb5c6e66e376e57783aaa50a3 100644 --- a/deploy/asapo_services/scripts/templates.tf +++ b/deploy/asapo_services/scripts/templates.tf @@ -44,6 +44,7 @@ data "template_file" "asapo_receivers" { receiver_dataserver_cache_size = "${var.receiver_dataserver_cache_size}" receiver_receive_to_disk_threshold= "${var.receiver_receive_to_disk_threshold}" receiver_dataserver_nthreads = "${var.receiver_dataserver_nthreads}" + receiver_network_modes = "${var.receiver_network_modes}" asapo_user = "${var.asapo_user}" n_receivers = "${var.n_receivers}" } diff --git a/deploy/asapo_services/scripts/vars.tf b/deploy/asapo_services/scripts/vars.tf index 77412ea3f8b16c2e300b6b5266c2c9224a77d5e8..5d69d474089cd565a3bb5650c78a18e6a21eaef7 100644 --- a/deploy/asapo_services/scripts/vars.tf +++ b/deploy/asapo_services/scripts/vars.tf @@ -37,6 +37,9 @@ variable "receiver_dataserver_nthreads" {} variable "receiver_receive_to_disk_threshold" {} +variable "receiver_network_modes" {} + + variable "grafana_total_memory_size" {} variable "influxdb_total_memory_size" {} diff --git a/deploy/nomad_consul_docker/Dockerfile b/deploy/nomad_consul_docker/Dockerfile index 0be21dc22bc696791b948e3dd4fea6f25d2c818a..380b77c5adcd4807f5bbacd937bb813de9bff134 100644 --- a/deploy/nomad_consul_docker/Dockerfile +++ b/deploy/nomad_consul_docker/Dockerfile @@ -24,9 +24,9 @@ RUN add-apt-repository \ RUN apt-get update && apt-get install -y docker-ce-cli wget unzip iproute2 vim td-agent-bit -ENV CONSUL_VERSION=1.8.0 -ENV NOMAD_VERSION=0.11.3 -ENV TERRAFORM_VERSION=0.12.28 +ENV CONSUL_VERSION=1.8.4 +ENV NOMAD_VERSION=0.12.5 +ENV TERRAFORM_VERSION=0.13.3 ENV HASHICORP_RELEASES=https://releases.hashicorp.com RUN set -eux && \ diff --git a/deploy/nomad_consul_docker/jobs-start b/deploy/nomad_consul_docker/jobs-start index e25c56318748b648f7aecc1f3e5a44888136217c..cb24f6ad9483986177051ae8e8960e0b5d4bc564 100755 --- a/deploy/nomad_consul_docker/jobs-start +++ b/deploy/nomad_consul_docker/jobs-start @@ -4,6 +4,7 @@ if [ ! -f /var/nomad/token ] && [ "${ACL_ENABLED}" = "true" ]; then nomad acl bootstrap > /var/nomad/bootstrap && \ cat /var/nomad/bootstrap | grep Secret | awk '{print $4}' > /var/nomad/token && \ cp /var/nomad/token $NOMAD_ALLOC_DIR/nomad_token + cat /var/nomad/token fi if [ -f /var/run/asapo/user_vars.tfvars ]; then diff --git a/receiver/CMakeLists.txt b/receiver/CMakeLists.txt index f77b3213b2d76590a8b277a5744f0ba690ca02d6..9e33088349d0df74ace1396226b7b21ac971931c 100644 --- a/receiver/CMakeLists.txt +++ b/receiver/CMakeLists.txt @@ -69,6 +69,7 @@ set_target_properties(${TARGET_NAME}-bin PROPERTIES RUNTIME_OUTPUT_DIRECTORY ) configure_file(docker/Dockerfile . COPYONLY) +configure_file(docker/install_libfabric.sh . COPYONLY) ################################ diff --git a/receiver/docker/Dockerfile b/receiver/docker/Dockerfile index ad8b5c2634f7f6d9dae2f72e33587457cee68773..e03a3cf5c8fee45ded2c608ddc5d4ab711bd5a36 100644 --- a/receiver/docker/Dockerfile +++ b/receiver/docker/Dockerfile @@ -1,3 +1,6 @@ FROM ubuntu:18.04 ADD receiver / +ADD install_libfabric.sh install_libfabric.sh +RUN apt update && ./install_libfabric.sh + CMD ["/receiver","/var/lib/receiver/config.json"] diff --git a/receiver/docker/install_libfabric.sh b/receiver/docker/install_libfabric.sh new file mode 100755 index 0000000000000000000000000000000000000000..c2842b87d01fdd1bbc83b3b17c6579f2d301b03a --- /dev/null +++ b/receiver/docker/install_libfabric.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +apt install -y wget autoconf libtool make librdmacm-dev rdma-core +wget https://github.com/ofiwg/libfabric/archive/v1.11.0.tar.gz +tar xzf v1.11.0.tar.gz +cd libfabric-1.11.0 +./autogen.sh +./configure +make +make install +ldconfig +cd - +rm -rf libfabric-1.11.0 +rm v1.11.0.tar.gz