diff --git a/deploy/asapo_orchestration_docker/scripts/monitoring.nmd.tpl b/deploy/asapo_orchestration_docker/scripts/monitoring.nmd.tpl index 3ab354c402e8bd834a70413f88e3565148f5decb..e0282a8db0df8e44932a7ab0d56958d2e437e70b 100644 --- a/deploy/asapo_orchestration_docker/scripts/monitoring.nmd.tpl +++ b/deploy/asapo_orchestration_docker/scripts/monitoring.nmd.tpl @@ -159,6 +159,8 @@ job "monitoring" { network { port "telegraf_stream" { } + port "telegraf_health" { + } } } @@ -167,9 +169,9 @@ job "monitoring" { port = "telegraf_stream" check { name = "telegraf-alive" - type = "script" - command = "/bin/pidof" - args = ["telegraf"] + type = "http" + path = "/" + port = "telegraf_health" interval = "10s" timeout = "2s" } diff --git a/deploy/asapo_orchestration_docker/scripts/telegraf.conf.tpl b/deploy/asapo_orchestration_docker/scripts/telegraf.conf.tpl index e962ebe74feb2340fb6317290c359753027b541a..13e65266ddbea056aaae02c6125fb0eddbe68b85 100644 --- a/deploy/asapo_orchestration_docker/scripts/telegraf.conf.tpl +++ b/deploy/asapo_orchestration_docker/scripts/telegraf.conf.tpl @@ -9,6 +9,9 @@ [[inputs.consul]] +[[inputs.internal]] + collect_memstats = false + [[outputs.file]] files=["stdout"] @@ -16,3 +19,16 @@ [[outputs.influxdb]] urls = ["http://localhost:{{ env "NOMAD_META_nginx_port" }}/influxdb"] + +[[outputs.health]] + service_address = "http://{{ env "NOMAD_ADDR_telegraf_health" }}" + + namepass = ["internal_write"] + tagpass = { output = ["influxdb"] } + + [[outputs.health.compares]] + field = "buffer_size" + lt = 5000.0 + + [[outputs.health.contains]] + field = "buffer_size"