[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[ansible-taler-exchange] branch master updated: fix up alloy deployment
From: |
gnunet |
Subject: |
[ansible-taler-exchange] branch master updated: fix up alloy deployment |
Date: |
Thu, 30 Jan 2025 12:10:28 +0100 |
This is an automated email from the git hooks/post-receive script.
grothoff pushed a commit to branch master
in repository ansible-taler-exchange.
The following commit(s) were added to refs/heads/master by this push:
new 7d95abf fix up alloy deployment
7d95abf is described below
commit 7d95abf0d91b57efd3b25fa1df97f9b9cb7a057a
Author: Christian Grothoff <christian@grothoff.org>
AuthorDate: Thu Jan 30 12:10:25 2025 +0100
fix up alloy deployment
---
roles/monitoring/files/etc/default/prometheus | 2 +-
.../monitoring/files/etc/prometheus/prometheus.yml | 5 +
roles/monitoring/templates/etc/alloy/config.alloy | 279 +++++++++++++++------
3 files changed, 210 insertions(+), 76 deletions(-)
diff --git a/roles/monitoring/files/etc/default/prometheus
b/roles/monitoring/files/etc/default/prometheus
index ee4b1a8..f4403f6 100644
--- a/roles/monitoring/files/etc/default/prometheus
+++ b/roles/monitoring/files/etc/default/prometheus
@@ -2,4 +2,4 @@
# Due to shell escaping, to pass backslashes for regexes, you need to double
# them (\\d for \d). If running under systemd, you need to double them again
# (\\\\d to mean \d), and escape newlines too.
-ARGS="--web.listen-address=127.0.0.1:9090"
+ARGS="--web.listen-address=127.0.0.1:9090
--enable-feature=remote-write-receiver"
diff --git a/roles/monitoring/files/etc/prometheus/prometheus.yml
b/roles/monitoring/files/etc/prometheus/prometheus.yml
index 33a29e4..e088a4f 100644
--- a/roles/monitoring/files/etc/prometheus/prometheus.yml
+++ b/roles/monitoring/files/etc/prometheus/prometheus.yml
@@ -49,3 +49,8 @@ scrape_configs:
- job_name: 'process_exporter'
static_configs:
- targets: ['localhost:9256']
+
+ # Job, for Alloy
+ - job_name: 'alloy_exporter'
+ static_configs:
+ - targets: ['localhost:12345']
diff --git a/roles/monitoring/templates/etc/alloy/config.alloy
b/roles/monitoring/templates/etc/alloy/config.alloy
index 96fae1a..cf7d9e4 100644
--- a/roles/monitoring/templates/etc/alloy/config.alloy
+++ b/roles/monitoring/templates/etc/alloy/config.alloy
@@ -1,44 +1,101 @@
-// Sample config for Alloy.
-//
// For a full configuration reference, see https://grafana.com/docs/alloy
logging {
level = "warn"
}
-// Which log files to monitor
+// Push the logs to loki
+// See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
+loki.write "grafana_loki" {
+ endpoint {
+ url = "https://loki.taler-systems.com/loki/api/v1/push"
+ authorization {
+ type = "Bearer"
+ credentials = "{{ LOKI_ACCESS_TOKEN }}"
+ }
+ }
+}
+
+
+// Which log files to monitor: all regular log files with errors
local.file_match "local_files" {
path_targets = [
{"__path__" = "/var/log/*.log"},
- {"__path__" = "/var/log/nginx/*.err"},
]
sync_period = "5s"
}
-// Which log files to monitor
-local.file_match "http_logs" {
+
+// Connect local_files as source to filter_generic_logs
+// See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
+loki.source.file "log_scrape" {
+ targets = local.file_match.local_files.targets
+ forward_to = [loki.process.filter_generic_logs.receiver]
+ tail_from_end = true
+}
+
+// Which log files to monitor: all regular log files with errors
+local.file_match "nginx_errors" {
path_targets = [
- {"__path__" = "/var/log/nginx/*.log"},
+ {"__path__" = "/var/log/nginx/*.err"},
]
sync_period = "5s"
}
-// Connect local_files as source to filter_logs
+// Connect nginx_errors directly to loki
// See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
-loki.source.file "log_scrape" {
- targets = local.file_match.local_files.targets
- forward_to = [loki.process.filter_logs.receiver]
+loki.source.file "nginx_error_scrape" {
+ targets = local.file_match.nginx_errors.targets
+ forward_to = [loki.write.grafana_loki.receiver]
tail_from_end = true
}
-loki.source.file "web_scrape" {
+
+// Which log files to monitor: nginx regular logs
+local.file_match "http_logs" {
+ path_targets = [
+ {"__path__" = "/var/log/nginx/*.log"},
+ ]
+ sync_period = "5s"
+}
+
+// Connect http_files as source to filter_http
+// See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
+loki.source.file "http_scrape" {
targets = local.file_match.http_logs.targets
- forward_to = [loki.process.filter_logs.receiver]
+ forward_to = [loki.process.filter_http.receiver]
tail_from_end = true
}
-// Filter the logs
+// Filter the HTTP logs
// See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
-loki.process "filter_logs" {
+loki.process "filter_http" {
+
+ //
https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#stageregex-block
+ stage.regex {
+ expression = "(?P<ip>\\S+) (?P<identd>\\S+) (?P<user>\\S+)
\\[(?P<timestamp>[\\w:\\/]+\\s[+\\\\-]\\d{4})\\]
\"(?P<action>\\S+)\\s?(?P<path>\\S+)\\s?(?P<protocol>\\S+)?\"
(?P<status>\\d{3}|-)
(?P<size>\\d+|-)\\s?\"?(?P<referrer>[^\\\"]*)\"?\\s?\"?(?P<useragent>[^\\\"]*)?\"?"
+ }
+
+ // exported via http://localhost:12345/metrics to Prometheus
+ stage.metrics {
+ metric.histogram {
+ name = "http_status_codes"
+ prefix = "taler_requests_"
+ description = "HTTP status codes, reported from Nginx (all requests)"
+ source = "status"
+ max_idle_duration = "24h"
+ buckets =
[100,199,200,201,202,203,299,300,399,400,401,402,403,404,405,406,407,408,409,410,411,418,419,420,450,451,452,499,500,599]
+ }
+
+ //
https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metriccounter-block
+ metric.counter {
+ name = "total_requests"
+ prefix = "taler_requests_"
+ description = "Total Requests"
+ match_all = true
+ action = "inc"
+ }
+ }
+
stage.drop {
source = "http_logs"
expression = ".*GET.* 200 .*"
@@ -47,64 +104,40 @@ loki.process "filter_logs" {
forward_to = [loki.write.grafana_loki.receiver]
}
-// Push the logs to loki
-// See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
-loki.write "grafana_loki" {
- endpoint {
- url = "https://loki.taler-systems.com/loki/api/v1/push"
- authorization {
- type = "Bearer"
- credentials = "{{ LOKI_ACCESS_TOKEN }}"
- }
- }
-}
-
-// This was in the defaults, FIXME: not sure what it does...
-prometheus.exporter.unix "default" {
- include_exporter_metrics = true
- disable_collectors = ["mdadm"]
-}
-// This was in the defaults, FIXME: not sure what it does...
-prometheus.scrape "default" {
- targets = array.concat(
- prometheus.exporter.unix.default.targets,
- [{
- // Self-collect metrics
- job = "alloy",
- __address__ = "127.0.0.1:12345",
- }],
- )
-
- forward_to = [
- // TODO: components to forward metrics to (like prometheus.remote_write or
- // prometheus.relabel).
- ]
-}
-
-
-loki.source.file "nginx_taler_performance_logs" {
- targets = [{
+// Monitor the logs with the latency statistics
+local.file_match "nginx_taler_performance_logs" {
+ path_targets = [{
__path__ = "/var/log/nginx/*.tal",
job = "nginx/performance",
}]
- forward_to = [loki.process.perf_logs.receiver]
+ sync_period = "5s"
}
-# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/
+// Connect nginx_taler_performance_logs as source to perf_logs
+// See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
+loki.source.file "perf_scrape" {
+ targets = local.file_match.nginx_taler_performance_logs.targets
+ forward_to = [loki.process.perf_logs.receiver]
+ tail_from_end = true
+}
+
+
+
+// Here we export the *.tal logs with the Nginx latency data.
+//
https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/
loki.process "perf_logs" {
-#
https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#stageregex-block
+ //
https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#stageregex-block
stage.regex {
expression = "uri=/(?P<ep>[a-zA-Z]+)(?:/\\w+)?(?:/(?P<act>[a-zA-Z-]+))?
s=(?P<status>\\d{3}).*urt=(?P<urt>\\d+\\.\\d+|-)
rt=(?P<response_time>\\d+\\.\\d+) rl=(?P<request_length>\\d+)
bs=(?P<bytes_sent>\\d+)"
}
-#
-#
https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#stagetemplate-block
+ //
https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#stagetemplate-block
stage.template {
source = "endpoint"
- template = "{{ '{{' }} printf \"%s-%s\" .ep .act | trimSuffix "-" {{ '}}'
}}"
+ template = "{{ '{{' }} printf \"%s-%s\" .ep .act | trimSuffix \"-\" {{
'}}' }}"
}
stage.template {
@@ -112,30 +145,23 @@ loki.process "perf_logs" {
template = "{{ '{{' }} .urt | replace \"-\" \"0\" {{ '}}' }}"
}
+ // exported via http://localhost:12345/metrics to Prometheus
stage.metrics {
-#
https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metriccounter-block
- metric.counter {
- name = "total_requests"
- prefix = "taler_requests_"
- description = "Total Requests"
- match_all = true
- action = "inc"
- }
+ //
https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metriccounter-block
metric.gauge {
name = "response_time"
prefix = "taler_requests_"
- description = "Time taken for Nginx to respond"
+ description = "Time taken for Nginx to respond (non-GET requests)"
source = "response_time"
max_idle_duration = "24h"
action = "set"
}
-
-#
https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metrichistogram-block
-# https://www.robustperception.io/how-does-a-prometheus-histogram-work
+ //
https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metrichistogram-block
+ // https://www.robustperception.io/how-does-a-prometheus-histogram-work
metric.histogram {
name = "request_length_hist"
prefix = "taler_requests_"
- description = "Request Length reported from Nginx"
+ description = "Request Length reported from Nginx (non-GET requests)"
source = "request_length"
max_idle_duration = "24h"
buckets = [1,10,50,100,200,500,1000,2000,5000]
@@ -144,7 +170,7 @@ loki.process "perf_logs" {
metric.histogram {
name = "bytes_sent_hist"
prefix = "taler_requests_"
- description = "Number of bytes sent, reported from Nginx"
+ description = "Number of bytes sent, reported from Nginx (non-GET
requests)"
source = "bytes_sent"
max_idle_duration = "24h"
buckets = [1,10,50,100,200,500,1000,2000,5000]
@@ -152,7 +178,7 @@ loki.process "perf_logs" {
metric.histogram {
name = "response_time_hist"
prefix = "taler_requests_"
- description = "Time taken for Nginx to respond"
+ description = "Time taken for Nginx to respond (non-GET requests)"
source = "response_time"
max_idle_duration = "24h"
buckets = [0.001,0.0025,0.005,0.010,0.025,0.050,0.1,0.25,0.5,1,2,5]
@@ -160,12 +186,115 @@ loki.process "perf_logs" {
metric.histogram {
name = "upstream_response_time_hist"
prefix = "taler_requests_"
- description = "Time taken for the Exchange to respond to Nginx"
+ description = "Time taken for the Exchange to respond to Nginx (non-GET
requests)"
source = "upstream_response_time"
max_idle_duration = "24h"
buckets = [0.001,0.0025,0.005,0.010,0.025,0.050,0.1,0.25,0.5,1,2,5]
}
+ }
+ // Finally, pass on to Loki
forward_to = [loki.write.grafana_loki.receiver]
-}
\ No newline at end of file
+}
+
+
+// Monitor journald logs
+// Export journald logs to our generic filter
+// but first pass to our generic filter to change labels
+loki.source.journal "read" {
+ forward_to = [loki.process.filter_generic_logs.receiver]
+ relabel_rules = loki.relabel.journal.rules
+ max_age = "12h"
+ labels = {component = "loki.source.journal"}
+}
+
+
+//
https://community.grafana.com/t/scrape-journald-log-with-alloy-docker-container/119896
+loki.relabel "journal" {
+ forward_to = []
+ rule {
+ source_labels = ["__journal__systemd_unit"]
+ target_label = "systemd_unit"
+ }
+ rule {
+ source_labels = ["__journal__hostname"]
+ target_label = "systemd_hostname"
+ }
+ rule {
+ source_labels = ["__journal__transport"]
+ target_label = "systemd_transport"
+ }
+}
+
+
+// Generic filter for logs
+// See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
+loki.process "filter_generic_logs" {
+ // Determine log level:
+ //
https://community.grafana.com/t/extract-log-level-via-regex-and-set-it-as-a-label/134938/5
+ stage.regex {
+ expression =
`(?P<level>(?i)\b(info|debug|error|warn|warning|trace|fatal)\b)`
+ }
+
+ // https://grafana.com/docs/alloy/latest/tutorials/processing-logs/
+ // Drop debug
+ stage.drop {
+ source = "level"
+ value = "debug"
+ drop_counter_reason = "boring debugging data"
+ }
+ // Drop info
+ stage.drop {
+ source = "level"
+ value = "info"
+ drop_counter_reason = "boring info logs"
+ }
+ // Drop trace
+ stage.drop {
+ source = "level"
+ value = "trace"
+ drop_counter_reason = "boring info logs"
+ }
+
+ stage.metrics {
+ //
https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metriccounter-block
+
+ //
https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metrichistogram-block
+ // https://www.robustperception.io/how-does-a-prometheus-histogram-work
+ metric.counter {
+ name = "warn_log_level"
+ prefix = "system_logs_"
+ description = "Warnings in system logs"
+ source = "level"
+ value = "warn"
+ action = "inc"
+ }
+ metric.counter {
+ name = "warn_log_level"
+ prefix = "system_logs_"
+ description = "Warnings in system logs"
+ source = "level"
+ value = "warning"
+ action = "inc"
+ }
+ metric.counter {
+ name = "error_log_level"
+ prefix = "system_logs_"
+ description = "Errors in system logs"
+ source = "level"
+ value = "error"
+ action = "inc"
+ }
+ metric.counter {
+ name = "fatal_log_level"
+ prefix = "system_logs_"
+ description = "Fatal reports in system logs"
+ source = "level"
+ value = "fatal"
+ action = "inc"
+ }
+ }
+
+ forward_to = [loki.write.grafana_loki.receiver]
+}
--
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [ansible-taler-exchange] branch master updated: fix up alloy deployment,
gnunet <=