From 2e2840c71ef52b028fab6f69357500e98697f93d Mon Sep 17 00:00:00 2001 From: Jens Langhammer Date: Mon, 8 Mar 2021 13:02:02 +0100 Subject: [PATCH] helm: add service monitors --- helm/templates/prom-rules.yaml | 121 +++++++++++++++++++++++++++++++++ helm/templates/static-sm.yaml | 17 +++++ helm/templates/web-sm.yaml | 26 +++++++ helm/values.yaml | 3 + 4 files changed, 167 insertions(+) create mode 100644 helm/templates/prom-rules.yaml create mode 100644 helm/templates/static-sm.yaml create mode 100644 helm/templates/web-sm.yaml diff --git a/helm/templates/prom-rules.yaml b/helm/templates/prom-rules.yaml new file mode 100644 index 000000000..847f2a7e7 --- /dev/null +++ b/helm/templates/prom-rules.yaml @@ -0,0 +1,121 @@ +{{- if .Values.monitoring.enabled -}} +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ include "authentik.fullname" . }}-static-rules + labels: + app.kubernetes.io/name: {{ include "authentik.name" . }} + helm.sh/chart: {{ include "authentik.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +spec: + groups: + - name: Aggregate request counters + rules: + - record: job:django_http_requests_before_middlewares_total:sum_rate30s + expr: sum(rate(django_http_requests_before_middlewares_total[30s])) by (job) + - record: job:django_http_requests_unknown_latency_total:sum_rate30s + expr: sum(rate(django_http_requests_unknown_latency_total[30s])) by (job) + - record: job:django_http_ajax_requests_total:sum_rate30s + expr: sum(rate(django_http_ajax_requests_total[30s])) by (job) + - record: job:django_http_responses_before_middlewares_total:sum_rate30s + expr: sum(rate(django_http_responses_before_middlewares_total[30s])) by (job) + - record: job:django_http_requests_unknown_latency_including_middlewares_total:sum_rate30s + expr: sum(rate(django_http_requests_unknown_latency_including_middlewares_total[30s])) by (job) + - record: job:django_http_requests_body_total_bytes:sum_rate30s + expr: sum(rate(django_http_requests_body_total_bytes[30s])) by (job) + - record: job:django_http_responses_streaming_total:sum_rate30s + expr: sum(rate(django_http_responses_streaming_total[30s])) by (job) + - record: job:django_http_responses_body_total_bytes:sum_rate30s + expr: sum(rate(django_http_responses_body_total_bytes[30s])) by (job) + - record: job:django_http_requests_total:sum_rate30s + expr: sum(rate(django_http_requests_total_by_method[30s])) by (job) + - record: job:django_http_requests_total_by_method:sum_rate30s + expr: sum(rate(django_http_requests_total_by_method[30s])) by (job,method) + - record: job:django_http_requests_total_by_transport:sum_rate30s + expr: sum(rate(django_http_requests_total_by_transport[30s])) by (job,transport) + - record: job:django_http_requests_total_by_view:sum_rate30s + expr: sum(rate(django_http_requests_total_by_view_transport_method[30s])) by (job,view) + - record: job:django_http_requests_total_by_view_transport_method:sum_rate30s + expr: sum(rate(django_http_requests_total_by_view_transport_method[30s])) by (job,view,transport,method) + - record: job:django_http_responses_total_by_templatename:sum_rate30s + expr: sum(rate(django_http_responses_total_by_templatename[30s])) by (job,templatename) + - record: job:django_http_responses_total_by_status:sum_rate30s + expr: sum(rate(django_http_responses_total_by_status[30s])) by (job,status) + - record: job:django_http_responses_total_by_status_name_method:sum_rate30s + expr: sum(rate(django_http_responses_total_by_status_name_method[30s])) by (job,status,name,method) + - record: job:django_http_responses_total_by_charset:sum_rate30s + expr: sum(rate(django_http_responses_total_by_charset[30s])) by (job,charset) + - record: job:django_http_exceptions_total_by_type:sum_rate30s + expr: sum(rate(django_http_exceptions_total_by_type[30s])) by (job,type) + - record: job:django_http_exceptions_total_by_view:sum_rate30s + expr: sum(rate(django_http_exceptions_total_by_view[30s])) by (job,view) + - name: Aggregate latency histograms + rules: + - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s + expr: histogram_quantile(0.50, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s])) by (job, le)) + labels: + quantile: "50" + - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s + expr: histogram_quantile(0.95, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s])) by (job, le)) + labels: + quantile: "95" + - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s + expr: histogram_quantile(0.99, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s])) by (job, le)) + labels: + quantile: "99" + - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s + expr: histogram_quantile(0.999, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s])) by (job, le)) + labels: + quantile: "99.9" + - record: job:django_http_requests_latency_seconds:quantile_rate30s + expr: histogram_quantile(0.50, sum(rate(django_http_requests_latency_seconds_bucket[30s])) by (job, le)) + labels: + quantile: "50" + - record: job:django_http_requests_latency_seconds:quantile_rate30s + expr: histogram_quantile(0.95, sum(rate(django_http_requests_latency_seconds_bucket[30s])) by (job, le)) + labels: + quantile: "95" + - record: job:django_http_requests_latency_seconds:quantile_rate30s + expr: histogram_quantile(0.99, sum(rate(django_http_requests_latency_seconds_bucket[30s])) by (job, le)) + labels: + quantile: "99" + - record: job:django_http_requests_latency_seconds:quantile_rate30s + expr: histogram_quantile(0.999, sum(rate(django_http_requests_latency_seconds_bucket[30s])) by (job, le)) + labels: + quantile: "99.9" + - name: Aggregate model operations + rules: + - record: job:django_model_inserts_total:sum_rate1m + expr: sum(rate(django_model_inserts_total[1m])) by (job, model) + - record: job:django_model_updates_total:sum_rate1m + expr: sum(rate(django_model_updates_total[1m])) by (job, model) + - record: job:django_model_deletes_total:sum_rate1m + expr: sum(rate(django_model_deletes_total[1m])) by (job, model) + - name: Aggregate database operations + rules: + - record: job:django_db_new_connections_total:sum_rate30s + expr: sum(rate(django_db_new_connections_total[30s])) by (alias, vendor) + - record: job:django_db_new_connection_errors_total:sum_rate30s + expr: sum(rate(django_db_new_connection_errors_total[30s])) by (alias, vendor) + - record: job:django_db_execute_total:sum_rate30s + expr: sum(rate(django_db_execute_total[30s])) by (alias, vendor) + - record: job:django_db_execute_many_total:sum_rate30s + expr: sum(rate(django_db_execute_many_total[30s])) by (alias, vendor) + - record: job:django_db_errors_total:sum_rate30s + expr: sum(rate(django_db_errors_total[30s])) by (alias, vendor, type) + - name: Aggregate migrations + rules: + - record: job:django_migrations_applied_total:max + expr: max(django_migrations_applied_total) by (job, connection) + - record: job:django_migrations_unapplied_total:max + expr: max(django_migrations_unapplied_total) by (job, connection) + - name: Alerts + rules: + - alert: UnappliedMigrations + expr: job:django_migrations_unapplied_total:max > 0 + for: 1m + labels: + severity: testing +{{- end }} diff --git a/helm/templates/static-sm.yaml b/helm/templates/static-sm.yaml new file mode 100644 index 000000000..542e7ba41 --- /dev/null +++ b/helm/templates/static-sm.yaml @@ -0,0 +1,17 @@ +{{- if .Values.monitoring.enabled -}} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/name: {{ include "authentik.name" . }} + helm.sh/chart: {{ include "authentik.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + name: {{ include "authentik.fullname" . }}-static-monitoring +spec: + endpoints: + - port: http + selector: + matchLabels: + k8s.goauthentik.io/component: static +{{- end }} diff --git a/helm/templates/web-sm.yaml b/helm/templates/web-sm.yaml new file mode 100644 index 000000000..7c09a62fb --- /dev/null +++ b/helm/templates/web-sm.yaml @@ -0,0 +1,26 @@ +{{- if .Values.monitoring.enabled -}} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/name: {{ include "authentik.name" . }} + helm.sh/chart: {{ include "authentik.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + name: {{ include "authentik.fullname" . }}-web-monitoring +spec: + endpoints: + - basicAuth: + password: + name: {{ include "authentik.fullname" . }}-secret-key + key: SECRET_KEY + username: + name: {{ include "authentik.fullname" . }}-secret-key + key: monitoring_username + port: http + path: /metrics/ + interval: 10s + selector: + matchLabels: + k8s.goauthentik.io/component: web +{{- end }} diff --git a/helm/values.yaml b/helm/values.yaml index 2ba7180ff..22f044b45 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -14,6 +14,9 @@ workerReplicas: 1 # Enable the Kubernetes integration which lets authentik deploy outposts into kubernetes kubernetesIntegration: true +monitoring: + enabled: true + config: # Optionally specify fixed secret_key, otherwise generated automatically # secretKey: _k*@6h2u2@q-dku57hhgzb7tnx*ba9wodcb^s9g0j59@=y(@_o