From 2a03a415b66455cfacbc3b1a749a28ae635b770e Mon Sep 17 00:00:00 2001 From: Tim Schoof <tim.schoof@desy.de> Date: Tue, 11 Feb 2020 12:02:17 +0100 Subject: [PATCH] Improve telegraf health check The example values from https://github.com/influxdata/telegraf/tree/master/plugins/outputs/health are used. The internal metrics will also be output to the influxdb, but at the current low ingest rate this should not be a problem. --- .../scripts/monitoring.nmd.tpl | 8 +++++--- .../scripts/telegraf.conf.tpl | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/deploy/asapo_orchestration_docker/scripts/monitoring.nmd.tpl b/deploy/asapo_orchestration_docker/scripts/monitoring.nmd.tpl index 3ab354c40..e0282a8db 100644 --- a/deploy/asapo_orchestration_docker/scripts/monitoring.nmd.tpl +++ b/deploy/asapo_orchestration_docker/scripts/monitoring.nmd.tpl @@ -159,6 +159,8 @@ job "monitoring" { network { port "telegraf_stream" { } + port "telegraf_health" { + } } } @@ -167,9 +169,9 @@ job "monitoring" { port = "telegraf_stream" check { name = "telegraf-alive" - type = "script" - command = "/bin/pidof" - args = ["telegraf"] + type = "http" + path = "/" + port = "telegraf_health" interval = "10s" timeout = "2s" } diff --git a/deploy/asapo_orchestration_docker/scripts/telegraf.conf.tpl b/deploy/asapo_orchestration_docker/scripts/telegraf.conf.tpl index e962ebe74..13e65266d 100644 --- a/deploy/asapo_orchestration_docker/scripts/telegraf.conf.tpl +++ b/deploy/asapo_orchestration_docker/scripts/telegraf.conf.tpl @@ -9,6 +9,9 @@ [[inputs.consul]] +[[inputs.internal]] + collect_memstats = false + [[outputs.file]] files=["stdout"] @@ -16,3 +19,16 @@ [[outputs.influxdb]] urls = ["http://localhost:{{ env "NOMAD_META_nginx_port" }}/influxdb"] + +[[outputs.health]] + service_address = "http://{{ env "NOMAD_ADDR_telegraf_health" }}" + + namepass = ["internal_write"] + tagpass = { output = ["influxdb"] } + + [[outputs.health.compares]] + field = "buffer_size" + lt = 5000.0 + + [[outputs.health.contains]] + field = "buffer_size" -- GitLab