diff --git a/README.md b/README.md index 87e3e24f2cd12c666be0fbd9376ae799d5951751..d9c0cfaa49cd53418187572929d9c862cd70d940 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -system-library +system-charts ============ Rancher 2.0 system library charts. diff --git a/charts/rancher-monitoring/latest/.gitignore b/charts/rancher-monitoring/latest/.gitignore deleted file mode 100644 index 5e7d2734cfc60289debf74293817c0a8f572ff32..0000000000000000000000000000000000000000 --- a/charts/rancher-monitoring/latest/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -# Ignore everything in this directory -* -# Except this file -!.gitignore diff --git a/charts/rancher-monitoring/v0.0.1/.helmignore b/charts/rancher-monitoring/v0.0.1/.helmignore new file mode 100644 index 0000000000000000000000000000000000000000..f0c13194444163d1cba5c67d9e79231a62bc8f44 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/charts/rancher-monitoring/v0.0.1/Chart.yaml b/charts/rancher-monitoring/v0.0.1/Chart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f1d78679c62d109a28e76147275b2f43103b374 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/Chart.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +description: Provides monitoring for Kubernetes which maintaining by Rancher 2. +engine: gotpl +maintainers: +- name: thxCode + email: frank@rancher.com +name: rancher-monitoring +sources: +- https://github.com/coreos/prometheus-operator +version: 0.0.1 +appVersion: "0.23.2" +home: https://github.com/coreos/prometheus-operator +keywords: +- operator +- prometheus +icon: https://coreos.com/sites/default/files/inline-images/Overview-prometheus_0.png diff --git a/charts/rancher-monitoring/v0.0.1/README.md b/charts/rancher-monitoring/v0.0.1/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7db2815d5e2116f9609fbfdf7acf346696b2bbb4 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/README.md @@ -0,0 +1,16 @@ +# rancher-monitoring + +Installs [prometheus-operator](https://github.com/coreos/prometheus-operator) to create/configure/manage Prometheus clusters atop Kubernetes. + +> **Tip**: Only use for Rancher Monitoring!!! + +## Introduction + +This chart bootstraps a [prometheus-operator](https://github.com/coreos/prometheus-operator) deployment on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager. + +### Security + +Alertmanager, Node exporter, Kube-state exporter, Grafana and Prometheus in same [Namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) will use the same [ServiceAccount](https://kubernetes.io/docs/reference/access-authn-authz/service-accounts-admin/) as Prometheus, which named like `prometheus-{{ .Release.Name }}`. Operator uses another one. + +## Prerequisites + - Rancher 2.1+ diff --git a/charts/rancher-monitoring/v0.0.1/charts/alertmanager/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/Chart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7131a3e45976df142e606280a816e189d5c0d22c --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/Chart.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +description: Creates Alertmanager CRD instance for Kubernetes which maintaining by Rancher 2. +engine: gotpl +maintainers: +- name: thxCode + email: frank@rancher.com +name: alertmanager +version: 0.0.1 diff --git a/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/alertmanager.yaml b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/alertmanager.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f428868d739b4ab3fb270ab7ed1aae06da2605bf --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/alertmanager.yaml @@ -0,0 +1,84 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: Alertmanager +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +{{- if .Values.labels }} +{{ toYaml .Values.labels | indent 4 }} +{{- end }} + name: {{ .Release.Name }} +spec: + podMetadata: + labels: +{{- if .Values.labels }} +{{ toYaml .Values.labels | indent 6 }} +{{- else }} + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} +{{- end }} + baseImage: "{{ .Values.image.repository }}" +{{- if .Values.externalUrl }} + externalUrl: "{{ .Values.externalUrl }}" +{{- end }} +{{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 4 }} +{{- end }} + paused: {{ .Values.paused }} + replicas: {{ .Values.replicaCount }} + logLevel: {{ .Values.logLevel }} + resources: +{{ toYaml .Values.resources | indent 4 }} + retention: "{{ .Values.retention }}" +{{- if .Values.routePrefix }} + routePrefix: "{{ .Values.routePrefix }}" +{{- end }} +{{- if .Values.secrets }} + secrets: +{{ toYaml .Values.secrets | indent 4 }} +{{- end }} +{{- if .Values.enabledRBAC }} + serviceAccountName: {{ .Values.serviceAccountName }} +{{- end }} +{{- if or .Values.storageSpec .Values.persistence.enabled }} + storage: + volumeClaimTemplate: + spec: +{{- if .Values.storageSpec }} +{{ toYaml .Values.storageSpec | indent 8 }} +{{- else }} + {{ if and .Values.persistence.storageClass (ne "default" .Values.persistence.storageClass) }} + storageClassName: {{ .Values.persistence.storageClass }} + {{ end }} + accessModes: + - {{ default "ReadWriteOnce" .Values.persistence.accessMode }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} +{{- end }} +{{- end }} + version: "{{ .Values.image.tag }}" + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + topologyKey: kubernetes.io/hostname + labelSelector: + matchLabels: + app: {{ template "app.name" . }} + alertmanager: {{ .Release.Name }} +{{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 4 }} +{{- end }} + imagePullSecrets: +{{ toYaml .Values.image.pullSecrets | indent 4 }} +{{- if .Values.sidecarsSpec }} + containers: +{{ toYaml .Values.sidecarsSpec | indent 4 }} +{{- end }} diff --git a/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/metrics-service.yaml b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/metrics-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6af953b7efb93aef31d9ee481f544567bdee1443 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/metrics-service.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Service +metadata: + name: expose-alertmanager-metrics + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + monitoring.cattle.io: "true" +spec: + type: ClusterIP + selector: +{{- if .Values.labels }} +{{ toYaml .Values.labels | indent 4 }} +{{- else }} + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} +{{- end }} + ports: + - name: http + port: 9093 + targetPort: web \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/nginx-configmap.yaml b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/nginx-configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb364c282b56098466496fcfac191abdf4c01a05 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/nginx-configmap.yaml @@ -0,0 +1,50 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "app.nginx.fullname" . }} + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + component: nginx +data: + nginx.conf: |- + user nginx; + worker_processes auto; + error_log /dev/null warn; + pid /var/run/nginx.pid; + + events { + worker_connections 1024; + } + + http { + include /etc/nginx/mime.types; + + log_format main '[$time_local - $status] $remote_addr - $remote_user $request ($http_referer)'; + + server { + listen 80; + + access_log off; + + gzip on; + gzip_min_length 1k; + gzip_comp_level 2; + gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript image/jpeg image/gif image/png; + gzip_vary on; + gzip_disable "MSIE [1-6]\."; + + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + location / { + proxy_pass http://alertmanager-operated:9093/; + } + + } + + } \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/nginx-deployment.yaml b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/nginx-deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2b124f9bbf2f7df27e6f0ad9e33b3c8bd9ae8b6e --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/nginx-deployment.yaml @@ -0,0 +1,51 @@ +apiVersion: {{ template "deployment_api_version" . }} +kind: Deployment +metadata: + name: {{ template "app.nginx.fullname" . }} + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + component: nginx +spec: + replicas: 1 + selector: + matchLabels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + component: nginx + template: + metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + component: nginx + spec: + containers: + - name: nginx + image: nginx:1.15.2 + args: + - nginx + - -g + - daemon off; + - -c + - /nginx/nginx.conf + volumeMounts: + - mountPath: /nginx/ + name: alertmanager-nginx + ports: + - name: http + containerPort: 80 + protocol: TCP + volumes: + - name: alertmanager-nginx + configMap: + defaultMode: 438 + items: + - key: nginx.conf + mode: 438 + path: nginx.conf + name: {{ template "app.nginx.fullname" . }} \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/secret.yaml b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/secret.yaml new file mode 100644 index 0000000000000000000000000000000000000000..173aba82cf8f98967e20f3e12964b56a883ecab0 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/secret.yaml @@ -0,0 +1,16 @@ +{{- if not .Values.configFromSecret }} +apiVersion: v1 +kind: Secret +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.fullname" . }} +data: + alertmanager.yaml: {{ toYaml .Values.config | b64enc | quote }} +{{- range $key, $val := .Values.templates }} + {{ $key }}: {{ $val | b64enc | quote }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/service.yaml b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a792737f607f38f208bbcca486715a8c272d1d34 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/service.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + name: access-alertmanager + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + kubernetes.io/cluster-service: "true" +spec: + type: ClusterIP + selector: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + component: nginx + ports: + - name: http + port: 80 + targetPort: http diff --git a/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/servicemonitor.yaml b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/servicemonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..204187772a834a96b22fc4d7fc265b3ee526ab23 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/templates/servicemonitor.yaml @@ -0,0 +1,27 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: ServiceMonitor +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.serviceMonitor.labels }} +{{ toYaml .Values.serviceMonitor.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + jobLabel: altermanager + selector: + matchLabels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + monitoring.cattle.io: "true" + namespaceSelector: + matchNames: + - {{ .Release.Namespace | quote }} + endpoints: + - port: http + interval: 30s diff --git a/charts/rancher-monitoring/v0.0.1/charts/alertmanager/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/values.yaml new file mode 100755 index 0000000000000000000000000000000000000000..c930f696eff4e53098d6d7ebee5e03a03b836657 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/alertmanager/values.yaml @@ -0,0 +1,185 @@ +enabledRBAC: true + +## Already exist ServiceAccount +## +serviceAccountName: "" + +## CRD apiGroup +## +apiGroup: "monitoring.coreos.com" + +## The name of a secret in the same kubernetes namespace which contains the Alertmanager config +## If defined this will be used instead of the `config` block values. +## The name of the secret must be alertmanager-{{ .Release.Name }} and its data must contain, at least, a key called `alertmanager.yaml` +## that contains the configuration as value. +## +configFromSecret: "" + +## Alertmanager configuration directives +## Ref: https://prometheus.io/docs/alerting/configuration/ +## +config: {} +# +# An example config: +# global: +# resolve_timeout: 5m +# route: +# group_by: ['job'] +# group_wait: 30s +# group_interval: 5m +# repeat_interval: 12h +# receiver: 'null' +# routes: +# - match: +# alertname: DeadMansSwitch +# receiver: 'null' +# receivers: +# - name: 'null' + +## Alertmanager template files to include +# +templates: {} +# +# An example template: +# template_1.tmpl: |- +# {{ define "cluster" }}{{ .ExternalURL | reReplaceAll ".*alertmanager\\.(.*)" "$1" }}{{ end }} +# +# {{ define "slack.myorg.text" }} +# {{- $root := . -}} +# {{ range .Alerts }} +# *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}` +# *Cluster:* {{ template "cluster" $root }} +# *Description:* {{ .Annotations.description }} +# *Graph:* <{{ .GeneratorURL }}|:chart_with_upwards_trend:> +# *Runbook:* <{{ .Annotations.runbook }}|:spiral_note_pad:> +# *Details:* +# {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}` +# {{ end }} + +## External URL at which Alertmanager will be reachable +## +externalUrl: "" + +serviceMonitor: + ## Custom Labels to be added to ServiceMonitor + ## + labels: {} + +## Alertmanager container image +## +image: + repository: quay.io/prometheus/alertmanager + tag: v0.15.2 + +## Labels to be added to the Alertmanager +## +# labels: {} + +## Node labels for Alertmanager pod assignment +## Ref: https://kubernetes.io/docs/user-guide/node-selection/ +## +nodeSelector: {} + +## Tolerations for use with node taints +## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +## +tolerations: {} + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + +## If true, the Operator won't process any Alertmanager configuration changes +## +paused: false + +## Number of Alertmanager replicas desired +## +replicaCount: 1 + +## Resource limits & requests +## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ +## +resources: {} + # requests: + # memory: 400Mi + +## How long to retain metrics +## +retention: 24h + +## Prefix used to register routes, overriding externalUrl route. +## Useful for proxies that rewrite URLs. +## +routePrefix: "" + +## List of Secrets in the same namespace as the Alertmanager +## object, which shall be mounted into the Alertmanager Pods. +## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec +## +secrets: [] + +service: + ## Maintains session affinity. Should be set to ClientIP for HA setup + ## Only options are ClientIP and None. Do not leave blank. + sessionAffinity: None + + ## Annotations to be added to the Service + ## + annotations: {} + + ## Cluster-internal IP address for Alertmanager Service + ## + clusterIP: "" + + ## List of external IP addresses at which the Alertmanager Service will be available + ## + externalIPs: [] + + ## Labels to be added to the Service + ## + labels: {} + + ## External IP address to assign to Alertmanager Service + ## Only used if service.type is 'LoadBalancer' and supported by cloud provider + ## + loadBalancerIP: "" + + ## List of client IPs allowed to access Alertmanager Service + ## Only used if service.type is 'LoadBalancer' and supported by cloud provider + ## + loadBalancerSourceRanges: [] + + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + # nodePort: 30903 + + ## Service type + ## + type: ClusterIP + +logLevel: "info" + +## Alertmanager StorageSpec for persistent data +## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md +## +storageSpec: {} +# storageClassName: gluster +# accessModes: ["ReadWriteOnce"] +# resources: +# requests: +# storage: 50Gi +# selector: {} + +## Easy way to create persistent data +## +persistence: {} +# enabled: true +# storageClass: gluster +# accessMode: "ReadWriteOnce" +# size: 50Gi + +sidecarsSpec: [] +# - name: sidecar +# image: registry/name:tag diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-coredns/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-coredns/Chart.yaml new file mode 100755 index 0000000000000000000000000000000000000000..7490890e5182c9288a6db26850146ec844dc9ede --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-coredns/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +description: Creates ServiceMonitor CRD of coredns for Kubernetes which maintaining by Rancher 2. +engine: gotpl +maintainers: +- name: thxCode + email: frank@rancher.com +name: exporter-coredns +version: 0.0.1 + diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-coredns/templates/endpoints.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-coredns/templates/endpoints.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2b8c81a5493bd35a4f80fa35ecd47c898040e67d --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-coredns/templates/endpoints.yaml @@ -0,0 +1,41 @@ +{{- if .Values.endpoints }} +apiVersion: v1 +kind: Service +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + k8s-app: coredns + name: {{ template "app.dnsname" . }} +spec: + type: ClusterIP + clusterIP: None + ports: + - name: metrics + port: {{ .Values.ports.metrics.port }} + protocol: TCP + targetPort: {{ .Values.ports.metrics.port }} +--- + +apiVersion: v1 +kind: Endpoints +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.dnsname" . }} +subsets: + - addresses: + {{- range .Values.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: metrics + port: {{ .Values.ports.metrics.port }} + protocol: TCP +{{- end }} + diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-coredns/templates/servicemonitor.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-coredns/templates/servicemonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3308358b63819bbb967e01c0f0832a5221149735 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-coredns/templates/servicemonitor.yaml @@ -0,0 +1,27 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: ServiceMonitor +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.serviceMonitor.labels }} +{{ toYaml .Values.serviceMonitor.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + jobLabel: coredns + selector: + matchLabels: + k8s-app: coredns + namespaceSelector: + any: true + matchNames: + - "kube-system" + - {{ .Release.Namespace | quote }} + endpoints: + - port: metrics + interval: 15s + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-coredns/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-coredns/values.yaml new file mode 100755 index 0000000000000000000000000000000000000000..ad12f9b743778019a3dfdc4b20877b1b8986cbd9 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-coredns/values.yaml @@ -0,0 +1,16 @@ +## CRD apiGroup +## +apiGroup: "monitoring.coreos.com" + +## Custom endpoints +## +endpoints: [] + +ports: + metrics: + port: 9153 + +serviceMonitor: + ## Custom Labels to be added to ServiceMonitor + ## + labels: {} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-fluentd/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-fluentd/Chart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18405cf631c48081fbc574a9deb951f177caa9c2 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-fluentd/Chart.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +description: Creates Fluentd Metrics Exporter instance for Kubernetes which maintaining by Rancher 2. +engine: gotpl +maintainers: +- name: aiwantaozi + email: michelia.feng@gmail.com +name: exporter-fluentd +version: 0.0.1 diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-fluentd/templates/servicemonitor.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-fluentd/templates/servicemonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5db46066ee5c823e510fb6fb69e6578fade45f08 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-fluentd/templates/servicemonitor.yaml @@ -0,0 +1,25 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: ServiceMonitor +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.serviceMonitor.labels }} +{{ toYaml .Values.serviceMonitor.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + jobLabel: fluentd + selector: + matchLabels: + k8s-app: fluentd + namespaceSelector: + matchNames: + - cattle-logging + endpoints: + - port: metrics + interval: 15s + honorLabels: true diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-fluentd/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-fluentd/values.yaml new file mode 100755 index 0000000000000000000000000000000000000000..05b1cf73cc818044c15c30675e808462cbf75cd6 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-fluentd/values.yaml @@ -0,0 +1,11 @@ +## CRD apiGroup +## +apiGroup: "monitoring.coreos.com" + +## Skip verification until we have resolved why the certificate validation +## for the kubelet on API server nodes fail. +## +serviceMonitor: + ## Custom Labels to be added to ServiceMonitor + ## + labels: {} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-controller-manager/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-controller-manager/Chart.yaml new file mode 100755 index 0000000000000000000000000000000000000000..95e49cb8e033448be537739c22d90f5eb6fb58ec --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-controller-manager/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +description: Creates ServiceMonitor CRD of controller manager for Kubernetes which maintaining by Rancher 2. +engine: gotpl +maintainers: +- name: thxCode + email: frank@rancher.com +name: exporter-kube-controller-manager +version: 0.0.1 + diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-controller-manager/templates/endpoints.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-controller-manager/templates/endpoints.yaml new file mode 100644 index 0000000000000000000000000000000000000000..69386968a5c40677e42fd37c9adf3267b58a78cb --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-controller-manager/templates/endpoints.yaml @@ -0,0 +1,40 @@ +{{- if .Values.endpoints }} +apiVersion: v1 +kind: Service +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + k8s-app: kube-controller-manager + name: {{ template "app.dnsname" . }} +spec: + type: ClusterIP + clusterIP: None + ports: + - name: metrics + port: {{ .Values.ports.metrics.port }} + protocol: TCP + targetPort: {{ .Values.ports.metrics.port }} +--- + +apiVersion: v1 +kind: Endpoints +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.dnsname" . }} +subsets: + - addresses: + {{- range .Values.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: metrics + port: {{ .Values.ports.metrics.port }} + protocol: TCP +{{- end }} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-controller-manager/templates/servicemonitor.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-controller-manager/templates/servicemonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cef9a78f2617244461109ba6fd281eed66cd8232 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-controller-manager/templates/servicemonitor.yaml @@ -0,0 +1,32 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: ServiceMonitor +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.serviceMonitor.labels }} +{{ toYaml .Values.serviceMonitor.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + jobLabel: kube-controller-manager + selector: + matchLabels: + k8s-app: kube-controller-manager + namespaceSelector: + any: true + matchNames: + - "kube-system" + - {{ .Release.Namespace | quote }} + endpoints: + - port: metrics + interval: 15s + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + {{- if .Values.insecureSkipVerify }} + insecureSkipVerify: true + {{- end }} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-controller-manager/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-controller-manager/values.yaml new file mode 100755 index 0000000000000000000000000000000000000000..0a35f3c623c441b2f4cab61c58243e94fb9b7108 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-controller-manager/values.yaml @@ -0,0 +1,21 @@ +## CRD apiGroup +## +apiGroup: "monitoring.coreos.com" + +## Skip verification until we have resolved why the certificate validation +## for the kubelet on API server nodes fail. +## +insecureSkipVerify: true + +## Custom endpoints +## +endpoints: [] + +ports: + metrics: + port: 10252 + +serviceMonitor: + ## Custom Labels to be added to ServiceMonitor + ## + labels: {} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-dns/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-dns/Chart.yaml new file mode 100755 index 0000000000000000000000000000000000000000..27270fdcf9db04145f40e63d377936e512146279 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-dns/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +description: Creates ServiceMonitor CRD of kube-dns for Kubernetes which maintaining by Rancher 2. +engine: gotpl +maintainers: +- name: thxCode + email: frank@rancher.com +name: exporter-kube-dns +version: 0.0.1 + diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-dns/templates/endpoints.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-dns/templates/endpoints.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9d5999e6036ee5db516ebde7632b61426c7be06 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-dns/templates/endpoints.yaml @@ -0,0 +1,47 @@ +{{- if .Values.endpoints }} +apiVersion: v1 +kind: Service +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + k8s-app: coredns + name: {{ template "app.dnsname" . }} +spec: + type: ClusterIP + clusterIP: None + ports: + - name: dnsmasq-metrics + port: {{ .Values.ports.metrics.dnsmasq.port }} + protocol: TCP + targetPort: {{ .Values.ports.metrics.dnsmasq.port }} + - name: skydns-metrics + port: {{ .Values.ports.metrics.skydns.port }} + protocol: TCP + targetPort: {{ .Values.ports.metrics.skydns.port }} +--- + +apiVersion: v1 +kind: Endpoints +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.dnsname" . }} +subsets: + - addresses: + {{- range .Values.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: dnsmasq-metrics + port: {{ .Values.ports.metrics.dnsmasq.port }} + protocol: TCP + - name: skydns-metrics + port: {{ .Values.ports.metrics.skydns.port }} + protocol: TCP +{{- end }} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-dns/templates/servicemonitor.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-dns/templates/servicemonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47a6ef7e3570708aab46b2ea8db4e760f9851441 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-dns/templates/servicemonitor.yaml @@ -0,0 +1,30 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: ServiceMonitor +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.serviceMonitor.labels }} +{{ toYaml .Values.serviceMonitor.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + jobLabel: kube-dns + selector: + matchLabels: + k8s-app: coredns + namespaceSelector: + any: true + matchNames: + - "kube-system" + - {{ .Release.Namespace | quote }} + endpoints: + - port: dnsmasq-metrics + interval: 15s + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + - port: skydns-metrics + interval: 15s + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-dns/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-dns/values.yaml new file mode 100755 index 0000000000000000000000000000000000000000..dcc3e86e360bb12e0fc300e2f60a30febd67257d --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-dns/values.yaml @@ -0,0 +1,19 @@ +## CRD apiGroup +## +apiGroup: "monitoring.coreos.com" + +## Custom endpoints +## +endpoints: [] + +ports: + metrics: + dnsmasq: + port: 10054 + skydns: + port: 10055 + +serviceMonitor: + ## Custom Labels to be added to ServiceMonitor + ## + labels: {} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-etcd/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-etcd/Chart.yaml new file mode 100755 index 0000000000000000000000000000000000000000..7ea4c443904d79d3130cad558cc3d6279dc9d9d9 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-etcd/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +description: Creates ServiceMonitor CRD of etcd for Kubernetes which maintaining by Rancher 2. +engine: gotpl +maintainers: +- name: thxCode + email: frank@rancher.com +name: exporter-kube-etcd +version: 0.0.1 + diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-etcd/templates/endpoints.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-etcd/templates/endpoints.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c39b2a1894cf3fa8ef90fb1cd9c21823635495c7 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-etcd/templates/endpoints.yaml @@ -0,0 +1,40 @@ +{{- if .Values.endpoints }} +apiVersion: v1 +kind: Service +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + k8s-app: etcd-server + name: {{ template "app.dnsname" . }} +spec: + type: ClusterIP + clusterIP: None + ports: + - name: metrics + port: {{ .Values.ports.metrics.port }} + protocol: TCP + targetPort: {{ .Values.ports.metrics.port }} +--- + +apiVersion: v1 +kind: Endpoints +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.dnsname" . }} +subsets: + - addresses: + {{- range .Values.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: metrics + port: {{ .Values.ports.metrics.port }} + protocol: TCP +{{- end }} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-etcd/templates/servicemonitor.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-etcd/templates/servicemonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b99ceaa09ad88f1125b197c82816803a5f63c0b --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-etcd/templates/servicemonitor.yaml @@ -0,0 +1,41 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: ServiceMonitor +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.serviceMonitor.labels }} +{{ toYaml .Values.serviceMonitor.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + jobLabel: kube-etcd + selector: + matchLabels: + k8s-app: etcd-server + namespaceSelector: + any: true + matchNames: + - "kube-system" + - {{ .Release.Namespace | quote }} + endpoints: + - port: metrics + interval: 15s + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if eq .Values.ports.metrics.scheme "https" }} + scheme: https + tlsConfig: + caFile: {{ .Values.caFile }} + {{- if .Values.certFile }} + certFile: {{ .Values.certFile }} + {{- end }} + {{- if .Values.keyFile }} + keyFile: {{ .Values.keyFile }} + {{- end}} + {{- if .Values.insecureSkipVerify }} + insecureSkipVerify: true + {{- end }} + {{- end }} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-etcd/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-etcd/values.yaml new file mode 100755 index 0000000000000000000000000000000000000000..bd6eaad1772987cf86838eee1baa5670feaa9390 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-etcd/values.yaml @@ -0,0 +1,28 @@ +## CRD apiGroup +## +apiGroup: "monitoring.coreos.com" + +## Skip verification until we have resolved why the certificate validation +## for the kubelet on API server nodes fail. +## +insecureSkipVerify: true + +## TLS Cofiguration for the service monitor, default to none, but append cert and keyfile if passed +## +caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt +certFile: "" +keyFile: "" + +## Custom endpoints +## +endpoints: [] + +ports: + metrics: + scheme: "https" + port: 4001 + +serviceMonitor: + ## Custom Labels to be added to ServiceMonitor + ## + labels: {} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/Chart.yaml new file mode 100755 index 0000000000000000000000000000000000000000..62529895c312a9cf6c9fdd0e0c0bd0c87fca19d7 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +description: Creates ServiceMonitor CRD of scheduler for Kubernetes which maintaining by Rancher 2. +engine: gotpl +maintainers: +- name: thxCode + email: frank@rancher.com +name: exporter-kube-scheduler +version: 0.0.1 + diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/templates/endpoints.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/templates/endpoints.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d9cb524fa03a5077dbc72cf52a350c804383cd1f --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/templates/endpoints.yaml @@ -0,0 +1,40 @@ +{{- if .Values.endpoints }} +apiVersion: v1 +kind: Service +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + k8s-app: kube-scheduler + name: {{ template "app.dnsname" . }} +spec: + type: ClusterIP + clusterIP: None + ports: + - name: metrics + port: {{ .Values.ports.metrics.port }} + protocol: TCP + targetPort: {{ .Values.ports.metrics.port }} +--- + +apiVersion: v1 +kind: Endpoints +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.dnsname" . }} +subsets: + - addresses: + {{- range .Values.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: metrics + port: {{ .Values.ports.metrics.port }} + protocol: TCP +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/templates/prometheusrule.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/templates/prometheusrule.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aae5a0459b7f0ebbc78a1e719103951902bbe856 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/templates/prometheusrule.yaml @@ -0,0 +1,62 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: PrometheusRule +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.prometheusRule.labels }} +{{ toYaml .Values.prometheusRule.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + groups: + - name: kube-scheduler.rules + rules: + - record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile + expr: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket) + BY (le, cluster)) / 1e+06 + labels: + quantile: "0.99" + - record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile + expr: histogram_quantile(0.9, sum(scheduler_e2e_scheduling_latency_microseconds_bucket) + BY (le, cluster)) / 1e+06 + labels: + quantile: "0.9" + - record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile + expr: histogram_quantile(0.5, sum(scheduler_e2e_scheduling_latency_microseconds_bucket) + BY (le, cluster)) / 1e+06 + labels: + quantile: "0.5" + - record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile + expr: histogram_quantile(0.99, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket) + BY (le, cluster)) / 1e+06 + labels: + quantile: "0.99" + - record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile + expr: histogram_quantile(0.9, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket) + BY (le, cluster)) / 1e+06 + labels: + quantile: "0.9" + - record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile + expr: histogram_quantile(0.5, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket) + BY (le, cluster)) / 1e+06 + labels: + quantile: "0.5" + - record: cluster:scheduler_binding_latency_seconds:quantile + expr: histogram_quantile(0.99, sum(scheduler_binding_latency_microseconds_bucket) + BY (le, cluster)) / 1e+06 + labels: + quantile: "0.99" + - record: cluster:scheduler_binding_latency_seconds:quantile + expr: histogram_quantile(0.9, sum(scheduler_binding_latency_microseconds_bucket) + BY (le, cluster)) / 1e+06 + labels: + quantile: "0.9" + - record: cluster:scheduler_binding_latency_seconds:quantile + expr: histogram_quantile(0.5, sum(scheduler_binding_latency_microseconds_bucket) + BY (le, cluster)) / 1e+06 + labels: + quantile: "0.5" diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/templates/servicemonitor.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/templates/servicemonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..debe27931245103eac751e3b9ea8ba1e4339af7b --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/templates/servicemonitor.yaml @@ -0,0 +1,27 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: ServiceMonitor +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.serviceMonitor.labels }} +{{ toYaml .Values.serviceMonitor.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + jobLabel: kube-scheduler + selector: + matchLabels: + k8s-app: kube-scheduler + namespaceSelector: + any: true + matchNames: + - "kube-system" + - {{ .Release.Namespace | quote }} + endpoints: + - port: metrics + interval: 15s + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/values.yaml new file mode 100755 index 0000000000000000000000000000000000000000..32009873d820a583e3285d25000d7e48297e6029 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-scheduler/values.yaml @@ -0,0 +1,21 @@ +## CRD apiGroup +## +apiGroup: "monitoring.coreos.com" + +## Custom endpoints +## +endpoints: [] + +ports: + metrics: + port: 10251 + +serviceMonitor: + ## Custom Labels to be added to ServiceMonitor + ## + labels: {} + +prometheusRule: + ## Custom Labels to be added to PrometheusRule + ## + labels: {} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/Chart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7fcf49c0b098f7efa268df0f419212ba92d3b50e --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/Chart.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +description: Creates Kube-state Exporter instance for Kubernetes which maintaining by Rancher 2. +engine: gotpl +maintainers: +- name: thxCode + email: frank@rancher.com +name: exporter-kube-state +version: 0.0.1 diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/templates/deployment.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/templates/deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c51d485ce7be578e13c267924a90327aab5f3930 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/templates/deployment.yaml @@ -0,0 +1,60 @@ +apiVersion: {{ template "deployment_api_version" . }} +kind: Deployment +metadata: + name: {{ template "app.fullname" . }} + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + template: + metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + spec: + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + containers: + - name: kube-state + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + ports: + - name: http + containerPort: 8080 + protocol: TCP + livenessProbe: + httpGet: + path: / + port: 8080 + initialDelaySeconds: 30 + timeoutSeconds: 30 + readinessProbe: + httpGet: + path: / + port: 8080 + initialDelaySeconds: 30 + timeoutSeconds: 5 + resources: + limits: + cpu: 100m + memory: 200Mi + requests: + cpu: 100m + memory: 130Mi + {{- if .Values.enabledRBAC }} + serviceAccountName: {{ .Values.serviceAccountName }} + {{- end }} +{{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/templates/service.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/templates/service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92436028d2973f83075a03f0c9a20e5465f2fb5d --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/templates/service.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + name: expose-kubernetes-metrics + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + monitoring.cattle.io: "true" +spec: + type: ClusterIP + selector: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + ports: + - name: metrics + port: {{ .Values.ports.metrics.port }} + targetPort: 8080 + protocol: TCP diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/templates/servicemonitor.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/templates/servicemonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04cd5385e85fc8bc46be359f3300797ed6b65c43 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/templates/servicemonitor.yaml @@ -0,0 +1,28 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: ServiceMonitor +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.serviceMonitor.labels }} +{{ toYaml .Values.serviceMonitor.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + jobLabel: kube-state + selector: + matchLabels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + monitoring.cattle.io: "true" + namespaceSelector: + matchNames: + - {{ .Release.Namespace | quote }} + endpoints: + - port: metrics + interval: 15s + honorLabels: true diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/values.yaml new file mode 100755 index 0000000000000000000000000000000000000000..7576b2d1a0739fc2c244402314865412d2524bfc --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kube-state/values.yaml @@ -0,0 +1,43 @@ +enabledRBAC: true + +## Already exist ServiceAccount +## +serviceAccountName: "" + +## CRD apiGroup +## +apiGroup: "monitoring.coreos.com" + +## Expertor listens on where and exports on host +## +ports: + metrics: + port: 8080 + +# Default values for kube-state-metrics. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +replicaCount: 1 + +image: + repository: quay.io/coreos/kube-state-metrics + tag: v1.4.0 + +## Node Selector to constrain pods to run on particular nodes +## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ +## +nodeSelector: {} + +## Tolerations for use with node taints +## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +## +tolerations: {} + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + +serviceMonitor: + ## Custom Labels to be added to ServiceMonitor + ## + labels: {} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kubelets/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kubelets/Chart.yaml new file mode 100755 index 0000000000000000000000000000000000000000..8d6d734ea5a1f215a0428c1e5ac81327e51ca432 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kubelets/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +description: Creates ServiceMonitor CRD of kublets for Kubernetes which maintaining by Rancher 2. +engine: gotpl +maintainers: +- name: thxCode + email: frank@rancher.com +name: exporter-kubelets +version: 0.0.1 + diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kubelets/templates/servicemonitor.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kubelets/templates/servicemonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92fac72939f692a2d05d2e1fce86c9b315dcb3a9 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kubelets/templates/servicemonitor.yaml @@ -0,0 +1,49 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: ServiceMonitor +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.serviceMonitor.labels }} +{{ toYaml .Values.serviceMonitor.labels | indent 4 }} + {{- end }} + name: {{ template "charts.exporter-kubelets.fullname" . }} +spec: + jobLabel: kubelet + selector: + matchLabels: + k8s-app: kubelet + namespaceSelector: + any: true + matchNames: + - "kube-system" + - {{ .Release.Namespace | quote }} + endpoints: + - port: https-metrics + scheme: https + interval: 15s + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + {{- if .Values.insecureSkipVerify }} + insecureSkipVerify: true + {{- end }} + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + - port: https-metrics + scheme: https + path: /metrics/cadvisor + interval: 30s + honorLabels: true + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + {{- if .Values.insecureSkipVerify }} + insecureSkipVerify: true + {{- end }} + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + - port: http-metrics + interval: 15s + - port: cadvisor + interval: 30s + honorLabels: true diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kubelets/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kubelets/values.yaml new file mode 100755 index 0000000000000000000000000000000000000000..0c62f8d425369211dc3df3954f2250e94907fbf0 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kubelets/values.yaml @@ -0,0 +1,13 @@ +## CRD apiGroup +## +apiGroup: "monitoring.coreos.com" + +## Skip verification until we have resolved why the certificate validation +## for the kubelet on API server nodes fail. +## +insecureSkipVerify: true + +serviceMonitor: + ## Custom Labels to be added to ServiceMonitor + ## + labels: {} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kubernetes/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kubernetes/Chart.yaml new file mode 100755 index 0000000000000000000000000000000000000000..e5f165d43bc8ff7b4875da2867d991ee76ddc756 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kubernetes/Chart.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +description: Creates ServiceMonitor CRD of apiserver for Kubernetes which maintaining by Rancher 2. +engine: gotpl +maintainers: +- name: thxCode + email: frank@rancher.com +name: exporter-kubernetes +version: 0.0.1 diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kubernetes/templates/prometheusrule.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kubernetes/templates/prometheusrule.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4fb438d63c40245a88cdc6a73a241512a3c3e4a --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kubernetes/templates/prometheusrule.yaml @@ -0,0 +1,58 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: PrometheusRule +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.prometheusRule.labels }} +{{ toYaml .Values.prometheusRule.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + groups: + - name: kubernetes.rules + rules: + - record: pod_name:container_memory_usage_bytes:sum + expr: sum(container_memory_usage_bytes{container_name!="POD",pod_name!=""}) BY + (pod_name) + - record: pod_name:container_spec_cpu_shares:sum + expr: sum(container_spec_cpu_shares{container_name!="POD",pod_name!=""}) BY (pod_name) + - record: pod_name:container_cpu_usage:sum + expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name!=""}[5m])) + BY (pod_name) + - record: pod_name:container_fs_usage_bytes:sum + expr: sum(container_fs_usage_bytes{container_name!="POD",pod_name!=""}) BY (pod_name) + - record: namespace:container_memory_usage_bytes:sum + expr: sum(container_memory_usage_bytes{container_name!=""}) BY (namespace) + - record: namespace:container_spec_cpu_shares:sum + expr: sum(container_spec_cpu_shares{container_name!=""}) BY (namespace) + - record: namespace:container_cpu_usage:sum + expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD"}[5m])) + BY (namespace) + - record: cluster:memory_usage:ratio + expr: sum(container_memory_usage_bytes{container_name!="POD",pod_name!=""}) BY + (cluster) / sum(machine_memory_bytes) BY (cluster) + - record: cluster:container_spec_cpu_shares:ratio + expr: sum(container_spec_cpu_shares{container_name!="POD",pod_name!=""}) / 1000 + / sum(machine_cpu_cores) + - record: cluster:container_cpu_usage:ratio + expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name!=""}[5m])) + / sum(machine_cpu_cores) + - record: apiserver_latency_seconds:quantile + expr: histogram_quantile(0.99, rate(apiserver_request_latencies_bucket[5m])) / + 1e+06 + labels: + quantile: "0.99" + - record: apiserver_latency:quantile_seconds + expr: histogram_quantile(0.9, rate(apiserver_request_latencies_bucket[5m])) / + 1e+06 + labels: + quantile: "0.9" + - record: apiserver_latency_seconds:quantile + expr: histogram_quantile(0.5, rate(apiserver_request_latencies_bucket[5m])) / + 1e+06 + labels: + quantile: "0.5" diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kubernetes/templates/servicemonitor.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kubernetes/templates/servicemonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d6459b2ca840180d4b42600f2cc46aff7497102 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kubernetes/templates/servicemonitor.yaml @@ -0,0 +1,32 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: ServiceMonitor +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.serviceMonitor.labels }} +{{ toYaml .Values.serviceMonitor.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + jobLabel: kubernetes + selector: + matchLabels: + component: apiserver + provider: kubernetes + namespaceSelector: + matchNames: + - "default" + endpoints: + - port: https + interval: 15s + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + {{- if .Values.insecureSkipVerify }} + insecureSkipVerify: true + {{- end }} + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-kubernetes/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-kubernetes/values.yaml new file mode 100755 index 0000000000000000000000000000000000000000..ad3385719224b11aee9a14a6d890d12e447ade78 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-kubernetes/values.yaml @@ -0,0 +1,18 @@ +## CRD apiGroup +## +apiGroup: "monitoring.coreos.com" + +## Skip verification until we have resolved why the certificate validation +## for the kubelet on API server nodes fail. +## +insecureSkipVerify: true + +serviceMonitor: + ## Custom Labels to be added to ServiceMonitor + ## + labels: {} + +prometheusRule: + ## Custom Labels to be added to PrometheusRule + ## + labels: {} \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-node/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-node/Chart.yaml new file mode 100755 index 0000000000000000000000000000000000000000..1d87e0213539866ee21f1fa15c7810f279559530 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-node/Chart.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +description: Creates Node Exporter instance for Kubernetes which maintaining by Rancher 2. +engine: gotpl +maintainers: +- name: thxCode + email: frank@rancher.com +name: exporter-node +version: 0.0.1 \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-node/templates/daemonset.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-node/templates/daemonset.yaml new file mode 100755 index 0000000000000000000000000000000000000000..e131f09fdf2ff051c6ace2a2261d1ee830491566 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-node/templates/daemonset.yaml @@ -0,0 +1,65 @@ +apiVersion: {{ template "daemonset_api_version" . }} +kind: DaemonSet +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.fullname" . }} +spec: + selector: + matchLabels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + template: + metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + spec: + tolerations: + - operator: "Exists" + - key: "node-role.kubernetes.io/master" + operator: "Exists" + - key: "node-role.kubernetes.io/etcd" + operator: "Exists" + - key: "node-role.kubernetes.io/controlplane" + operator: "Exists" + containers: + - name: exporter-node + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + args: + - --web.listen-address=0.0.0.0:{{ .Values.ports.metrics.port }} + {{- if and .Values.container .Values.container.args }} +{{ toYaml .Values.container.args | indent 10 }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.ports.metrics.port }} + hostPort: {{ .Values.ports.metrics.port }} + resources: +{{ toYaml .Values.resources | indent 12 }} + {{- if and .Values.container .Values.container.volumeMounts }} + volumeMounts: +{{ toYaml .Values.container.volumeMounts | indent 10 }} + {{- end }} + {{- if .Values.enabledRBAC }} + serviceAccountName: {{ .Values.serviceAccountName }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + hostNetwork: true + hostPID: true + {{- if and .Values.container .Values.container.volumes }} + volumes: +{{ toYaml .Values.container.volumes | indent 6 }} + {{- end}} diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-node/templates/prometheusrule.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-node/templates/prometheusrule.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2eb7635edfb4befd74146f5feb985cbe959c4f3f --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-node/templates/prometheusrule.yaml @@ -0,0 +1,34 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: PrometheusRule +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.prometheusRule.labels }} +{{ toYaml .Values.prometheusRule.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + groups: + - name: node.rules + rules: + - record: instance:node_cpu:rate:sum + expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[3m])) + BY (instance) + - record: instance:node_filesystem_usage:sum + expr: sum((node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"})) + BY (instance) + - record: instance:node_network_receive_bytes:rate:sum + expr: sum(rate(node_network_receive_bytes[3m])) BY (instance) + - record: instance:node_network_transmit_bytes:rate:sum + expr: sum(rate(node_network_transmit_bytes[3m])) BY (instance) + - record: instance:node_cpu:ratio + expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) + GROUP_LEFT() count(sum(node_cpu) BY (instance, cpu)) BY (instance) + - record: cluster:node_cpu:sum_rate5m + expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m])) + - record: cluster:node_cpu:ratio + expr: cluster:node_cpu:rate5m / count(sum(node_cpu) BY (instance, cpu)) diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-node/templates/service.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-node/templates/service.yaml new file mode 100755 index 0000000000000000000000000000000000000000..d3047653b3bce3a6008132b132df6c938eab01aa --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-node/templates/service.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + name: expose-node-metrics + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + monitoring.cattle.io: "true" +spec: + type: ClusterIP + selector: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + ports: + - name: metrics + port: {{ .Values.ports.metrics.port }} + targetPort: {{ .Values.ports.metrics.port }} + protocol: TCP diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-node/templates/servicemonitor.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-node/templates/servicemonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97765cb8cb07d3d5c7c08d711fc72023d9d92597 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-node/templates/servicemonitor.yaml @@ -0,0 +1,27 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: ServiceMonitor +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.serviceMonitor.labels }} +{{ toYaml .Values.serviceMonitor.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + jobLabel: node + selector: + matchLabels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + monitoring.cattle.io: "true" + namespaceSelector: + matchNames: + - {{ .Release.Namespace | quote }} + endpoints: + - port: metrics + interval: 15s diff --git a/charts/rancher-monitoring/v0.0.1/charts/exporter-node/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/exporter-node/values.yaml new file mode 100755 index 0000000000000000000000000000000000000000..1809f5a9244b4b8b73016eee94968c12c71adc79 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/exporter-node/values.yaml @@ -0,0 +1,75 @@ +enabledRBAC: true + +## Already exist ServiceAccount +## +serviceAccountName: "" + +## CRD apiGroup +## +apiGroup: "monitoring.coreos.com" + +## Expertor listens on where and exports on host +## +ports: + metrics: + port: 9100 + +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +replicaCount: 1 + +image: + repository: quay.io/prometheus/node-exporter + tag: v0.16.0 + +resources: + limits: + cpu: 200m + memory: 50Mi + requests: + cpu: 100m + memory: 30Mi + +container: + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + + volumes: + - name: proc + hostPath: + path: /proc + - name: sys + hostPath: + path: /sys + + volumeMounts: + - name: proc + mountPath: /host/proc + readOnly: true + - name: sys + mountPath: /host/sys + readOnly: true + +## Tolerations for use with node taints +## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +## +tolerations: +- effect: NoSchedule + operator: Exists + +## Node Selector to constrain pods to run on particular nodes +## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ +## +nodeSelector: {} + +serviceMonitor: + ## Custom Labels to be added to ServiceMonitor + ## + labels: {} + +prometheusRule: + ## Custom Labels to be added to PrometheusRule + ## + labels: {} + diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/grafana/Chart.yaml new file mode 100755 index 0000000000000000000000000000000000000000..c007dfbe627e70889bea143f6796f6f99534997e --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/Chart.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +description: Creates Grafana instance for Kubernetes which maintaining by Rancher 2. +engine: gotpl +maintainers: +- name: thxCode + email: frank@rancher.com +name: grafana +version: 0.0.1 diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_cluster-dashboard.json b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_cluster-dashboard.json new file mode 100644 index 0000000000000000000000000000000000000000..37736a784999bf4500d936cc7ea6bc5d27dde035 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_cluster-dashboard.json @@ -0,0 +1 @@ +{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"},{"type":"panel","id":"table","name":"Table","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"Monitors Kubernetes cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only.","editable":true,"gnetId":1621,"graphTooltip":0,"id":null,"iteration":1543396027075,"links":[],"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":34,"panels":[],"title":"Total usage","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":8,"x":0,"y":1},"height":"180px","id":6,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (rate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) * 100","format":"time_series","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"65, 90","title":"Cluster CPU usage (2m avg)","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":8,"x":8,"y":1},"height":"180px","id":4,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"}) * 100","format":"time_series","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"65, 90","title":"Cluster memory usage","transparent":false,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":8,"x":16,"y":1},"height":"180px","id":7,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (container_fs_usage_bytes{device=~\"^/dev/.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (container_fs_limit_bytes{device=~\"^/dev/.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) * 100","format":"time_series","interval":"10s","intervalFactor":1,"legendFormat":"","metric":"","refId":"A","step":10}],"thresholds":"65, 90","title":"Cluster filesystem usage","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":0,"y":6},"height":"1px","id":11,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":" cores","postfixFontSize":"30%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (rate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m]))","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"","title":"Used","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":4,"y":6},"height":"1px","id":12,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":" cores","postfixFontSize":"30%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"})","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"","title":"Total","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"bytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":8,"y":6},"height":"1px","id":9,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"20%","prefix":"","prefixFontSize":"20%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"","title":"Used","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"bytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":12,"y":6},"height":"1px","id":10,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"})","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"","title":"Total","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"bytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":16,"y":6},"height":"1px","id":13,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (container_fs_usage_bytes{device=~\"^/dev/.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"","title":"Used","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"format":"bytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":20,"y":6},"height":"1px","id":14,"interval":null,"isNew":true,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum (container_fs_limit_bytes{device=~\"^/dev/.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})","interval":"10s","intervalFactor":1,"refId":"A","step":10}],"thresholds":"","title":"Total","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":6,"w":8,"x":0,"y":9},"id":2051,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (rate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) * 100","format":"time_series","hide":false,"instant":false,"intervalFactor":1,"legendFormat":"Cluster","refId":"A"},{"expr":"sum (rate (container_cpu_usage_seconds_total{id=\"/\"}[2m])) by (kubernetes_io_hostname) / sum (machine_cpu_cores) by (kubernetes_io_hostname) * 100","format":"time_series","hide":false,"intervalFactor":1,"legendFormat":"{{kubernetes_io_hostname}}","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Cluster CPU usage (2m avg)","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percent","label":"","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":6,"w":8,"x":8,"y":9},"id":2052,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"}) * 100","format":"time_series","hide":false,"instant":false,"intervalFactor":1,"legendFormat":"Cluster","refId":"A"},{"expr":"sum (container_memory_working_set_bytes{id=\"/\"}) by (kubernetes_io_hostname) / sum (machine_memory_bytes) by (kubernetes_io_hostname) * 100","format":"time_series","hide":false,"intervalFactor":1,"legendFormat":"{{kubernetes_io_hostname}}","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Cluster memory usage","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percent","label":"","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":6,"w":8,"x":16,"y":9},"id":2053,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (container_fs_usage_bytes{device=~\"^/dev/.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (container_fs_limit_bytes{device=~\"^/dev/.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) * 100","format":"time_series","hide":false,"instant":false,"intervalFactor":1,"legendFormat":"Cluster","refId":"A"},{"expr":"sum (container_fs_usage_bytes{device=~\"^/dev/.*$\",id=\"/\"}) by (kubernetes_io_hostname) / sum (container_fs_limit_bytes{device=~\"^/dev/.*$\",id=\"/\"}) by (kubernetes_io_hostname) * 100","format":"time_series","hide":false,"intervalFactor":1,"legendFormat":"{{kubernetes_io_hostname}}","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Cluster filesystem usage","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"percent","label":"","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":15},"id":2022,"panels":[],"title":"Node","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":0,"y":16},"id":2024,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(kube_node_info{node=~\"$Node\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Number Of Nodes","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":true,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":8,"y":16},"id":2025,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(kube_node_status_condition{condition=\"OutOfDisk\", node=~\"$Node\", status=\"true\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"1","title":"Nodes Out of Disk","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":true,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":16,"y":16},"id":2026,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(kube_node_spec_unschedulable{node=~\"$Node\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"1","title":"Nodes Unavailable","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":19},"id":2014,"panels":[],"title":"Deployments","type":"row"},{"columns":[{"text":"Current","value":"current"}],"datasource":"${DS_PROMETHEUS}","fontSize":"100%","gridPos":{"h":5,"w":6,"x":0,"y":20},"id":2016,"links":[],"pageSize":null,"scroll":true,"showHeader":true,"sort":{"col":1,"desc":true},"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"date"},{"alias":"","colorMode":"row","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"decimals":0,"pattern":"Metric","thresholds":["0","0",".9"],"type":"string","unit":"none"},{"alias":"","colorMode":"row","colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":0,"link":false,"pattern":"Value","thresholds":["0","1"],"type":"number","unit":"none"}],"targets":[{"expr":"kube_deployment_status_replicas{namespace=~\".*\"}","format":"time_series","instant":true,"interval":"","intervalFactor":1,"legendFormat":"{{ deployment }}","refId":"A"}],"title":"Deployment Replicas - Up To Date","transform":"timeseries_to_rows","type":"table"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":6,"x":6,"y":20},"id":2018,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(kube_deployment_status_replicas{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Deployment Replicas","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":6,"x":12,"y":20},"id":2019,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(kube_deployment_status_replicas_updated{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Deployment Replicas - Updated","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":6,"x":18,"y":20},"id":2020,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"sum(kube_deployment_status_replicas_unavailable{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Deployment Replicas - Unavailable","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":25},"id":2045,"panels":[],"title":"Jobs","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":0,"y":26},"id":2047,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_job_status_succeeded{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Jobs Succeeded","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":8,"y":26},"id":2048,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_job_status_active{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Jobs Succeeded","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":16,"y":26},"id":2049,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_job_status_failed{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Jobs Failed","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":29},"id":2028,"panels":[],"title":"Pods","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":12,"x":0,"y":30},"id":2030,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(78, 203, 42, 0.28)","full":false,"lineColor":"#629e51","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_status_phase{namespace=~\".*\", phase=\"Running\"})","format":"time_series","interval":"","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Pods Running","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":12,"x":12,"y":30},"id":2031,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(78, 203, 42, 0.28)","full":false,"lineColor":"#629e51","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_status_phase{namespace=~\".*\", phase=\"Pending\"})","format":"time_series","interval":"","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Pods Pending","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":0,"y":33},"id":2032,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(78, 203, 42, 0.28)","full":false,"lineColor":"#629e51","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_status_phase{namespace=~\".*\", phase=\"Failed\"})","format":"time_series","interval":"","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Pods Failed","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":8,"y":33},"id":2033,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(78, 203, 42, 0.28)","full":false,"lineColor":"#629e51","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_status_phase{namespace=~\".*\", phase=\"Succeeded\"})","format":"time_series","interval":"","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Pods Succeeded","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":16,"y":33},"id":2034,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(78, 203, 42, 0.28)","full":false,"lineColor":"#629e51","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_status_phase{namespace=~\".*\", phase=\"Unknown\"})","format":"time_series","interval":"","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Pods Unknown","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":36},"id":2036,"panels":[],"title":"Containers","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":0,"y":37},"id":2038,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_container_status_running{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Containers Running","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":6,"y":37},"id":2039,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_container_status_waiting{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Containers Waiting","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":12,"y":37},"id":2040,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_container_status_terminated{namespace=~\".*\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Containers Terminated","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":18,"y":37},"id":2041,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(delta(kube_pod_container_status_restarts{namespace=\"kube-system\"}[30m]))","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Containers Restarts (Last 30 Minutes)","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":12,"x":0,"y":40},"id":2043,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_container_resource_requests_cpu_cores{namespace=~\".*\", node=~\"$Node\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"CPU Cores Requested by Containers","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","format":"decbytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":12,"x":12,"y":40},"id":2042,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(kube_pod_container_resource_requests_memory_bytes{namespace=~\".*\", node=~\"$Node\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"Memory Requested By Containers","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":43},"id":33,"panels":[],"title":"Network I/O pressure","type":"row"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":1,"grid":{},"gridPos":{"h":5,"w":24,"x":0,"y":44},"height":"200px","id":32,"isNew":true,"legend":{"alignAsTable":false,"avg":true,"current":true,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (rate (container_network_receive_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))","format":"time_series","interval":"10s","intervalFactor":1,"legendFormat":"Received","metric":"network","refId":"A","step":10},{"expr":"- sum (rate (container_network_transmit_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))","format":"time_series","interval":"10s","intervalFactor":1,"legendFormat":"Sent","metric":"network","refId":"B","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network I/O pressure","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"cumulative"},"transparent":false,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":49},"id":35,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":3,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":44},"height":"","id":17,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (rate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)","interval":"10s","intervalFactor":1,"legendFormat":"{{ pod_name }}","metric":"container_cpu","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Pods CPU usage (2m avg)","tooltip":{"msResolution":true,"shared":true,"sort":2,"value_type":"cumulative"},"transparent":false,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":"cores","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"Pods CPU usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":50},"id":36,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":3,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":45},"height":"","id":23,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (rate (container_cpu_usage_seconds_total{systemd_service_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (systemd_service_name)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"{{ systemd_service_name }}","metric":"container_cpu","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"System services CPU usage (2m avg)","tooltip":{"msResolution":true,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":"cores","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"System services CPU usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":51},"id":37,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":3,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":10},"height":"","id":24,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (rate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"pod: {{ pod_name }} | {{ container_name }}","metric":"container_cpu","refId":"A","step":10},{"expr":"sum (rate (container_cpu_usage_seconds_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})","metric":"container_cpu","refId":"B","step":10},{"expr":"sum (rate (container_cpu_usage_seconds_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)","interval":"10s","intervalFactor":1,"legendFormat":"rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}","metric":"container_cpu","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Containers CPU usage (2m avg)","tooltip":{"msResolution":true,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":"cores","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"Containers CPU usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":52},"id":38,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":3,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":13,"w":24,"x":0,"y":11},"id":20,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (rate (container_cpu_usage_seconds_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"{{ id }}","metric":"container_cpu","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"All processes CPU usage (2m avg)","tooltip":{"msResolution":true,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":"cores","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"All processes CPU usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":53},"id":39,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":15},"id":25,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (pod_name)","interval":"10s","intervalFactor":1,"legendFormat":"{{ pod_name }}","metric":"container_memory_usage:sort_desc","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Pods memory usage","tooltip":{"msResolution":false,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"Pods memory usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":54},"id":40,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":13},"id":26,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (container_memory_working_set_bytes{systemd_service_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (systemd_service_name)","interval":"10s","intervalFactor":1,"legendFormat":"{{ systemd_service_name }}","metric":"container_memory_usage:sort_desc","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"System services memory usage","tooltip":{"msResolution":false,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"System services memory usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":55},"id":41,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":27,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}) by (container_name, pod_name)","interval":"10s","intervalFactor":1,"legendFormat":"pod: {{ pod_name }} | {{ container_name }}","metric":"container_memory_usage:sort_desc","refId":"A","step":10},{"expr":"sum (container_memory_working_set_bytes{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, name, image)","interval":"10s","intervalFactor":1,"legendFormat":"docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})","metric":"container_memory_usage:sort_desc","refId":"B","step":10},{"expr":"sum (container_memory_working_set_bytes{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, rkt_container_name)","interval":"10s","intervalFactor":1,"legendFormat":"rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}","metric":"container_memory_usage:sort_desc","refId":"C","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Containers memory usage","tooltip":{"msResolution":false,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"Containers memory usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":56},"id":42,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":13,"w":24,"x":0,"y":15},"id":28,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"sum (container_memory_working_set_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) by (id)","interval":"10s","intervalFactor":1,"legendFormat":"{{ id }}","metric":"container_memory_usage:sort_desc","refId":"A","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"All processes memory usage","tooltip":{"msResolution":false,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"All processes memory usage","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":57},"id":43,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":1,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":16},"id":16,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)","interval":"10s","intervalFactor":1,"legendFormat":"-> {{ pod_name }}","metric":"network","refId":"A","step":10},{"expr":"- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)","interval":"10s","intervalFactor":1,"legendFormat":"<- {{ pod_name }}","metric":"network","refId":"B","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Pods network I/O (2m avg)","tooltip":{"msResolution":false,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"Pods network I/O","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":58},"id":44,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":1,"grid":{},"gridPos":{"h":7,"w":24,"x":0,"y":17},"id":30,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"-> pod: {{ pod_name }} | {{ container_name }}","metric":"network","refId":"B","step":10},{"expr":"- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"<- pod: {{ pod_name }} | {{ container_name }}","metric":"network","refId":"D","step":10},{"expr":"sum (rate (container_network_receive_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})","metric":"network","refId":"A","step":10},{"expr":"- sum (rate (container_network_transmit_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})","metric":"network","refId":"C","step":10},{"expr":"sum (rate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}","metric":"network","refId":"E","step":10},{"expr":"- sum (rate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)","hide":false,"interval":"10s","intervalFactor":1,"legendFormat":"<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}","metric":"network","refId":"F","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Containers network I/O (2m avg)","tooltip":{"msResolution":false,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"Containers network I/O","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":59},"id":45,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"editable":true,"error":false,"fill":1,"grid":{},"gridPos":{"h":13,"w":24,"x":0,"y":18},"id":29,"isNew":true,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":200,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum (rate (container_network_receive_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)","interval":"10s","intervalFactor":1,"legendFormat":"-> {{ id }}","metric":"network","refId":"A","step":10},{"expr":"- sum (rate (container_network_transmit_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)","interval":"10s","intervalFactor":1,"legendFormat":"<- {{ id }}","metric":"network","refId":"B","step":10}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"All processes network I/O (2m avg)","tooltip":{"msResolution":false,"shared":true,"sort":2,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"title":"All processes network I/O","type":"row"}],"refresh":"1m","schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[{"allValue":".*","current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":true,"label":null,"multi":false,"name":"Node","options":[],"query":"label_values(kubernetes_io_hostname)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-30m","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Cluster","uid":"icjpCppik","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false} \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_etcd-dashboard.json b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_etcd-dashboard.json new file mode 100644 index 0000000000000000000000000000000000000000..acf5272891480df233ca3eae65f27b8cc992a7fa --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_etcd-dashboard.json @@ -0,0 +1 @@ +{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"Etcd Dashboard for Prometheus metrics scraper","editable":true,"gnetId":3070,"graphTooltip":0,"id":null,"links":[],"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":7,"w":8,"x":0,"y":0},"id":44,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(etcd_server_has_leader)","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"0,1","title":"Etcd has a leader?","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"YES","value":"1"},{"op":"=","text":"NO","value":"0"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":7,"w":8,"x":8,"y":0},"id":42,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(etcd_server_leader_changes_seen_total)","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"The number of leader changes seen","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":7,"w":8,"x":16,"y":0},"id":43,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(etcd_server_leader_changes_seen_total)","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"The total number of failed proposals seen","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":0,"gridPos":{"h":7,"w":12,"x":0,"y":7},"id":23,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(grpc_server_started_total{grpc_type=\"unary\"}[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"RPC Rate","metric":"grpc_server_started_total","refId":"A","step":60},{"expr":"sum(rate(grpc_server_handled_total{grpc_type=\"unary\",grpc_code!=\"OK\"}[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"RPC Failed Rate","metric":"grpc_server_handled_total","refId":"B","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"RPC Rate","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":0,"gridPos":{"h":7,"w":12,"x":12,"y":7},"id":41,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(grpc_server_started_total{grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"})","format":"time_series","intervalFactor":2,"legendFormat":"Watch Streams","metric":"grpc_server_handled_total","refId":"A","step":60},{"expr":"sum(grpc_server_started_total{grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"})","format":"time_series","intervalFactor":2,"legendFormat":"Lease Streams","metric":"grpc_server_handled_total","refId":"B","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Active Streams","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":8,"x":0,"y":14},"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"etcd_debugging_mvcc_db_total_size_in_bytes","format":"time_series","hide":false,"interval":"","intervalFactor":2,"legendFormat":"{{instance}} DB Size","metric":"","refId":"A","step":120}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"DB Size","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"max":null,"min":null,"show":true},{"format":"short","logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":8,"x":8,"y":14},"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":1,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":true,"targets":[{"expr":"histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) by (instance, le))","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"{{instance}} WAL fsync","metric":"etcd_disk_wal_fsync_duration_seconds_bucket","refId":"A","step":120},{"expr":"histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) by (instance, le))","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} DB fsync","metric":"etcd_disk_backend_commit_duration_seconds_bucket","refId":"B","step":120}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Sync Duration","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","logBase":1,"max":null,"min":null,"show":true},{"format":"short","logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":0,"gridPos":{"h":7,"w":8,"x":16,"y":14},"id":29,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"process_resident_memory_bytes","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Resident Memory","metric":"process_resident_memory_bytes","refId":"A","step":120}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":5,"gridPos":{"h":7,"w":6,"x":0,"y":21},"id":22,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"rate(etcd_network_client_grpc_received_bytes_total[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Client Traffic In","metric":"etcd_network_client_grpc_received_bytes_total","refId":"A","step":120}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Client Traffic In","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":5,"gridPos":{"h":7,"w":6,"x":6,"y":21},"id":21,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"rate(etcd_network_client_grpc_sent_bytes_total[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Client Traffic Out","metric":"etcd_network_client_grpc_sent_bytes_total","refId":"A","step":120}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Client Traffic Out","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":0,"gridPos":{"h":7,"w":6,"x":12,"y":21},"id":20,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(etcd_network_peer_received_bytes_total[5m])) by (instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Peer Traffic In","metric":"etcd_network_peer_received_bytes_total","refId":"A","step":120}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Peer Traffic In","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"editable":true,"error":false,"fill":0,"grid":{},"gridPos":{"h":7,"w":6,"x":18,"y":21},"id":16,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(etcd_network_peer_sent_bytes_total[5m])) by (instance)","format":"time_series","hide":false,"interval":"","intervalFactor":2,"legendFormat":"{{instance}} Peer Traffic Out","metric":"etcd_network_peer_sent_bytes_total","refId":"A","step":120}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Peer Traffic Out","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","logBase":1,"max":null,"min":null,"show":true},{"format":"short","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":0,"gridPos":{"h":7,"w":12,"x":0,"y":28},"id":40,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(etcd_server_proposals_failed_total[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Proposal Failure Rate","metric":"etcd_server_proposals_failed_total","refId":"A","step":60},{"expr":"sum(etcd_server_proposals_pending)","format":"time_series","intervalFactor":2,"legendFormat":"Proposal Pending Total","metric":"etcd_server_proposals_pending","refId":"B","step":60},{"expr":"sum(rate(etcd_server_proposals_committed_total[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Proposal Commit Rate","metric":"etcd_server_proposals_committed_total","refId":"C","step":60},{"expr":"sum(rate(etcd_server_proposals_applied_total[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Proposal Apply Rate","refId":"D","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Raft Proposals","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":0,"editable":true,"error":false,"fill":0,"gridPos":{"h":7,"w":12,"x":12,"y":28},"id":19,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"changes(etcd_server_leader_changes_seen_total[1d])","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Total Leader Elections Per Day","metric":"etcd_server_leader_changes_seen_total","refId":"A","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Total Leader Elections Per Day","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"proposals_committed_total records the total number of consensus proposals committed. This gauge should increase over time if the cluster is healthy. Several healthy members of an etcd cluster may have different total committed proposals at once. This discrepancy may be due to recovering from peers after starting, lagging behind the leader, or being the leader and therefore having the most commits. It is important to monitor this metric across all the members in the cluster; a consistently large lag between a single member and its leader indicates that member is slow or unhealthy.\n\nproposals_applied_total records the total number of consensus proposals applied. The etcd server applies every committed proposal asynchronously. The difference between proposals_committed_total and proposals_applied_total should usually be small (within a few thousands even under high load). If the difference between them continues to rise, it indicates that the etcd server is overloaded. This might happen when applying expensive queries like heavy range queries or large txn operations.","fill":1,"gridPos":{"h":7,"w":12,"x":0,"y":35},"id":2,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":false,"rightSide":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(etcd_server_proposals_committed_total[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"total number of consensus proposals committed","metric":"","refId":"A","step":60},{"expr":"sum(rate(etcd_server_proposals_applied_total[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"total number of consensus proposals applied","metric":"","refId":"B","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"The total number of consensus proposals committed","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"indicates how many proposals are queued to commit. Rising pending proposals suggests there is a high client load or the member cannot commit proposals.","fill":1,"gridPos":{"h":7,"w":12,"x":12,"y":35},"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(etcd_server_proposals_pending)","format":"time_series","intervalFactor":2,"legendFormat":"Proposals pending","refId":"A","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Proposals pending","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":7,"w":24,"x":0,"y":42},"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(etcd_disk_wal_fsync_duration_seconds_sum[1m]))","format":"time_series","intervalFactor":2,"legendFormat":" The latency distributions of fsync called by wal","refId":"A","step":30},{"expr":"sum(rate(etcd_disk_backend_commit_duration_seconds_sum[1m]))","format":"time_series","intervalFactor":2,"legendFormat":"The latency distributions of commit called by backend","refId":"B","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disks operations","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":7,"w":24,"x":0,"y":49},"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(etcd_network_client_grpc_received_bytes_total[1m]))","format":"time_series","intervalFactor":2,"legendFormat":"The total number of bytes received by grpc clients","refId":"A","step":30},{"expr":"sum(rate(etcd_network_client_grpc_sent_bytes_total[1m]))","format":"time_series","intervalFactor":2,"legendFormat":"The total number of bytes sent to grpc clients","refId":"B","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"Abnormally high snapshot duration (snapshot_save_total_duration_seconds) indicates disk issues and might cause the cluster to be unstable.","fill":1,"gridPos":{"h":7,"w":24,"x":0,"y":56},"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(rate(etcd_debugging_snap_save_total_duration_seconds_sum[1m]))","format":"time_series","intervalFactor":2,"legendFormat":"The total latency distributions of save called by snapshot","refId":"A","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Snapshot duration","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Etcd","uid":"8naNKoYik","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false} \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_kubernetes-components-dashboard.json b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_kubernetes-components-dashboard.json new file mode 100644 index 0000000000000000000000000000000000000000..eea37a0c2f615fba389635d84721d0cd2808f953 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_kubernetes-components-dashboard.json @@ -0,0 +1 @@ +{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"-","editable":true,"gnetId":5508,"graphTooltip":0,"id":null,"links":[],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":12,"x":0,"y":0},"id":6,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(instance) (rate(apiserver_request_count{code!~\"2..\"}[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Error Rate","refId":"A","step":60},{"expr":"sum by(instance) (rate(apiserver_request_count[5m]))","format":"time_series","intervalFactor":2,"legendFormat":"Request Rate","refId":"B","step":60}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"API Server Request Rates","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":12,"x":12,"y":0},"id":7,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(verb) (rate(apiserver_latency_seconds:quantile[5m]) >= 0)","format":"time_series","intervalFactor":2,"legendFormat":"","refId":"A","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"API Server Request Latency","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":9,"w":12,"x":0,"y":7},"id":11,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(nginx_requests_total[1m])","format":"time_series","intervalFactor":1,"legendFormat":"Request Rate","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Nginx Ingress Request Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":9,"w":12,"x":12,"y":7},"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"service_depth","format":"time_series","intervalFactor":1,"legendFormat":"Service Depth","refId":"A"},{"expr":"volumes_depth","format":"time_series","intervalFactor":1,"legendFormat":"Volumes Depth","refId":"B"},{"expr":"replicationmanager_depth","format":"time_series","intervalFactor":1,"legendFormat":"Replication Manager Depth","refId":"C"},{"expr":"statefulset_depth","format":"time_series","intervalFactor":1,"legendFormat":"StatefulSet Depth","refId":"D"},{"expr":"serviceaccount_depth","format":"time_series","intervalFactor":1,"legendFormat":"Service Account Depth","refId":"E"},{"expr":"endpoint_depth","format":"time_series","intervalFactor":1,"legendFormat":"Endpoint Depth","refId":"F"},{"expr":"deployment_depth","format":"time_series","intervalFactor":1,"legendFormat":"Deployment Depth","refId":"G"},{"expr":"daemonset_depth","format":"time_series","intervalFactor":1,"legendFormat":"DaemonSet Depth","refId":"H"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Controller Manager Queue Depth","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":false,"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Kubernetes Components","uid":"Ld4acTYmz","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false} \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_kubernetes-resource-requests-dashboard.json b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_kubernetes-resource-requests-dashboard.json new file mode 100644 index 0000000000000000000000000000000000000000..d01e659ffe622654c7e57c0f9ac8522d2ca35b99 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_kubernetes-resource-requests-dashboard.json @@ -0,0 +1 @@ +{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":0,"id":null,"links":[],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"This represents the total [CPU resource requests](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu) in the cluster.\nFor comparison the total [allocatable CPU cores](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) is also shown.","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":8,"w":18,"x":0,"y":0},"id":1,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"min(sum(kube_node_status_allocatable_cpu_cores) by (instance))","hide":false,"intervalFactor":2,"legendFormat":"Allocatable CPU Cores","refId":"A","step":20},{"expr":"max(sum(kube_pod_container_resource_requests_cpu_cores) by (instance))","hide":false,"intervalFactor":2,"legendFormat":"Requested CPU Cores","refId":"B","step":20}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Cores","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"CPU Cores","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":6,"x":18,"y":0},"hideTimeOverride":false,"id":2,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"max(sum(kube_pod_container_resource_requests_cpu_cores) by (instance)) / min(sum(kube_node_status_allocatable_cpu_cores) by (instance)) * 100","intervalFactor":2,"legendFormat":"","refId":"A","step":240}],"thresholds":"80, 90","title":"CPU Cores","transparent":false,"type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"This represents the total [memory resource requests](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-memory) in the cluster.\nFor comparison the total [allocatable memory](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) is also shown.","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":8,"w":18,"x":0,"y":8},"id":3,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"min(sum(kube_node_status_allocatable_memory_bytes) by (instance))","hide":false,"intervalFactor":2,"legendFormat":"Allocatable Memory","refId":"A","step":20},{"expr":"max(sum(kube_pod_container_resource_requests_memory_bytes) by (instance))","hide":false,"intervalFactor":2,"legendFormat":"Requested Memory","refId":"B","step":20}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory","tooltip":{"msResolution":false,"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Memory","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":6,"x":18,"y":8},"hideTimeOverride":false,"id":4,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"max(sum(kube_pod_container_resource_requests_memory_bytes) by (instance)) / min(sum(kube_node_status_allocatable_memory_bytes) by (instance)) * 100","intervalFactor":2,"legendFormat":"","refId":"A","step":240}],"thresholds":"80, 90","title":"Memory","transparent":false,"type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"}],"refresh":false,"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"now-3h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Kubernetes Resource Requests","uid":"0MdTILxik","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false} \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_nodes-dashboard.json b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_nodes-dashboard.json new file mode 100644 index 0000000000000000000000000000000000000000..97468c42db278641ab589d71aa8b9af3a59f8781 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_nodes-dashboard.json @@ -0,0 +1 @@ +{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"Nearly all default values exported by Prometheus node exporter graphed.","editable":true,"gnetId":1860,"graphTooltip":0,"id":null,"iteration":1543396145848,"links":[],"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":261,"panels":[],"repeat":null,"title":"Basic CPU / Mem / Disk Gauge","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"Busy state of all CPU cores together","format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":4,"w":4,"x":0,"y":1},"id":20,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":2,"nullPointMode":"null","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"(((count(count(node_cpu_seconds_total{instance=~\"$node:$port\",job=~\"$job\"}) by (cpu))) - avg(sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=~\"$node:$port\",job=~\"$job\"}[5m])))) * 100) / count(count(node_cpu_seconds_total{instance=~\"$node:$port\",job=~\"$job\"}) by (cpu))","hide":false,"intervalFactor":1,"legendFormat":"","refId":"A","step":900}],"thresholds":"85,95","title":"CPU Busy","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":0,"description":"Non available RAM memory","format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":4,"w":4,"x":4,"y":1},"hideTimeOverride":false,"id":16,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":4,"nullPointMode":"null","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"((node_memory_MemTotal_bytes{instance=~\"$node:$port\",job=~\"$job\"} - node_memory_MemFree_bytes{instance=~\"$node:$port\",job=~\"$job\"}) / (node_memory_MemTotal_bytes{instance=~\"$node:$port\",job=~\"$job\"} )) * 100","format":"time_series","hide":true,"intervalFactor":1,"refId":"A","step":900},{"expr":"100 - ((node_memory_MemAvailable_bytes{instance=~\"$node:$port\",job=~\"$job\"} * 100) / node_memory_MemTotal_bytes{instance=~\"$node:$port\",job=~\"$job\"})","format":"time_series","hide":false,"intervalFactor":1,"refId":"B","step":900}],"thresholds":"80,90","title":"Used RAM Memory","type":"singlestat","valueFontSize":"80%","valueMaps":[],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"Used Swap","format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":4,"w":4,"x":8,"y":1},"id":21,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":4,"nullPointMode":"null","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"((node_memory_SwapTotal_bytes{instance=~\"$node:$port\",job=~\"$job\"} - node_memory_SwapFree_bytes{instance=~\"$node:$port\",job=~\"$job\"}) / (node_memory_SwapTotal_bytes{instance=~\"$node:$port\",job=~\"$job\"} )) * 100","intervalFactor":1,"refId":"A","step":900}],"thresholds":"10,25","title":"Used SWAP","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"Used Root FS","format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":4,"w":4,"x":12,"y":1},"id":154,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":4,"nullPointMode":"null","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"100 - ((node_filesystem_avail_bytes{instance=~\"$node:$port\",job=~\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"} * 100) / node_filesystem_size_bytes{instance=~\"$node:$port\",job=~\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"})","format":"time_series","intervalFactor":1,"refId":"A","step":900}],"thresholds":"80,90","title":"Used Root FS","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"Busy state of all CPU cores together (1 min average)","format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":4,"w":4,"x":16,"y":1},"id":19,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":2,"nullPointMode":"null","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"avg(node_load1{instance=~\"$node:$port\",job=~\"$job\"}) / count(count(node_cpu_seconds_total{instance=~\"$node:$port\",job=~\"$job\"}) by (cpu)) * 100","hide":false,"intervalFactor":1,"refId":"A","step":900}],"thresholds":"85, 95","title":"CPU System Load (1m avg)","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"Busy state of all CPU cores together (5 min average)","format":"percent","gauge":{"maxValue":100,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":4,"w":4,"x":20,"y":1},"id":155,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":2,"nullPointMode":"null","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"avg(node_load5{instance=~\"$node:$port\",job=~\"$job\"}) / count(count(node_cpu_seconds_total{instance=~\"$node:$port\",job=~\"$job\"}) by (cpu)) * 100","format":"time_series","hide":false,"intervalFactor":1,"refId":"A","step":900}],"thresholds":"85, 95","title":"CPU System Load (5m avg)","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":5},"id":262,"panels":[],"repeat":null,"title":"Basic CPU / Mem / Disk Info","type":"row"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","description":"Total number of CPU cores","format":"short","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":0,"y":6},"id":14,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":4,"nullPointMode":"null","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"count(count(node_cpu_seconds_total{instance=~\"$node:$port\",job=~\"$job\"}) by (cpu))","intervalFactor":1,"refId":"A","step":900}],"thresholds":"","title":"CPU Cores","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"description":"Total RAM","format":"bytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":4,"y":6},"id":75,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":4,"nullPointMode":"null","nullText":null,"postfix":"","postfixFontSize":"70%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"node_memory_MemTotal_bytes{instance=~\"$node:$port\",job=~\"$job\"}","intervalFactor":1,"refId":"A","step":900}],"thresholds":"","title":"Total RAM","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"description":"Total SWAP","format":"bytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":8,"y":6},"id":18,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":4,"nullPointMode":"null","nullText":null,"postfix":"","postfixFontSize":"70%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"node_memory_SwapTotal_bytes{instance=~\"$node:$port\",job=~\"$job\"}","intervalFactor":1,"refId":"A","step":900}],"thresholds":"","title":"Total SWAP","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(50, 172, 45, 0.97)","rgba(237, 129, 40, 0.89)","rgba(245, 54, 54, 0.9)"],"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"Total RootFS","format":"bytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":12,"y":6},"id":23,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":4,"nullPointMode":"null","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"node_filesystem_size_bytes{instance=~\"$node:$port\",job=~\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"}","format":"time_series","hide":false,"intervalFactor":1,"refId":"A","step":900}],"thresholds":"70,90","title":"Total RootFS","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","decimals":2,"description":"System Load (1m avg)","format":"short","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":16,"y":6},"id":17,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"minSpan":2,"nullPointMode":"null","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"node_load1{instance=~\"$node:$port\",job=~\"$job\"}","hide":false,"intervalFactor":1,"refId":"A","step":900}],"thresholds":"","title":"System Load (1m avg)","type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","decimals":1,"description":"System uptime","format":"s","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":4,"x":20,"y":6},"hideTimeOverride":true,"id":15,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"null","nullText":null,"postfix":"s","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"node_time_seconds{instance=~\"$node:$port\",job=~\"$job\"} - node_boot_time_seconds{instance=~\"$node:$port\",job=~\"$job\"}","intervalFactor":2,"refId":"A","step":1800}],"thresholds":"","title":"Uptime","transparent":false,"type":"singlestat","valueFontSize":"50%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":9},"id":263,"panels":[],"repeat":null,"title":"Basic CPU / Mem Graph","type":"row"},{"aliasColors":{"Busy":"#EAB839","Busy Iowait":"#890F02","Busy other":"#1F78C1","Idle":"#052B51","Idle - Waiting for something to happen":"#052B51","guest":"#9AC48A","idle":"#052B51","iowait":"#EAB839","irq":"#BF1B00","nice":"#C15C17","softirq":"#E24D42","steal":"#FCE2DE","system":"#508642","user":"#5195CE"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"description":"Basic CPU info","fill":4,"gridPos":{"h":7,"w":12,"x":0,"y":10},"id":77,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":250,"sort":null,"sortDesc":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":true,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"Busy Iowait","color":"#890F02"},{"alias":"Idle","color":"#7EB26D"},{"alias":"Busy System","color":"#EAB839"},{"alias":"Busy User","color":"#0A437C"},{"alias":"Busy Other","color":"#6D1F62"}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (instance)(rate(node_cpu_seconds_total{mode=\"system\",instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"Busy System","refId":"B","step":240},{"expr":"sum by (instance)(rate(node_cpu_seconds_total{mode='user',instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"Busy User","refId":"D","step":240},{"expr":"sum by (instance)(rate(node_cpu_seconds_total{mode='iowait',instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","intervalFactor":2,"legendFormat":"Busy Iowait","refId":"E","step":240},{"expr":"sum by (instance)(rate(node_cpu_seconds_total{mode=~\".*irq\",instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","intervalFactor":2,"legendFormat":"Busy IRQs","refId":"F","step":240},{"expr":"sum (rate(node_cpu_seconds_total{mode!='idle',mode!='user',mode!='system',mode!='iowait',mode!='irq',mode!='softirq',instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","intervalFactor":2,"legendFormat":"Busy Other","refId":"A","step":240},{"expr":"sum by (mode)(rate(node_cpu_seconds_total{mode='idle',instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","intervalFactor":2,"legendFormat":"Idle","refId":"C","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Basic","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"transparent":false,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"","logBase":1,"max":"100","min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","SWAP Used":"#BF1B00","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap Used":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"description":"Basic memory usage","fill":4,"gridPos":{"h":7,"w":12,"x":12,"y":10},"id":78,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"RAM Total","color":"#E0F9D7","fill":0,"stack":false},{"alias":"RAM Cache + Buffer","color":"#052B51"},{"alias":"RAM Free","color":"#7EB26D"},{"alias":"Avaliable","color":"#DEDAF7","fill":0,"stack":false}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"node_memory_MemTotal_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"RAM Total","refId":"A","step":240},{"expr":"node_memory_MemTotal_bytes{instance=~\"$node:$port\",job=~\"$job\"} - node_memory_MemFree_bytes{instance=~\"$node:$port\",job=~\"$job\"} - (node_memory_Cached_bytes{instance=~\"$node:$port\",job=~\"$job\"} + node_memory_Buffers_bytes{instance=~\"$node:$port\",job=~\"$job\"})","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"RAM Used","refId":"D","step":240},{"expr":"node_memory_Cached_bytes{instance=~\"$node:$port\",job=~\"$job\"} + node_memory_Buffers_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"RAM Cache + Buffer","refId":"B","step":240},{"expr":"node_memory_MemFree_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"RAM Free","refId":"F","step":240},{"expr":"(node_memory_SwapTotal_bytes{instance=~\"$node:$port\",job=~\"$job\"} - node_memory_SwapFree_bytes{instance=~\"$node:$port\",job=~\"$job\"})","format":"time_series","intervalFactor":2,"legendFormat":"SWAP Used","refId":"G","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Basic","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":17},"id":264,"panels":[],"repeat":null,"title":"Basic Net / Disk Info","type":"row"},{"aliasColors":{"Recv_bytes_eth2":"#7EB26D","Recv_bytes_lo":"#0A50A1","Recv_drop_eth2":"#6ED0E0","Recv_drop_lo":"#E0F9D7","Recv_errs_eth2":"#BF1B00","Recv_errs_lo":"#CCA300","Trans_bytes_eth2":"#7EB26D","Trans_bytes_lo":"#0A50A1","Trans_drop_eth2":"#6ED0E0","Trans_drop_lo":"#E0F9D7","Trans_errs_eth2":"#BF1B00","Trans_errs_lo":"#CCA300","recv_bytes_lo":"#0A50A1","recv_drop_eth0":"#99440A","recv_drop_lo":"#967302","recv_errs_eth0":"#BF1B00","recv_errs_lo":"#890F02","trans_bytes_eth0":"#7EB26D","trans_bytes_lo":"#0A50A1","trans_drop_eth0":"#99440A","trans_drop_lo":"#967302","trans_errs_eth0":"#BF1B00","trans_errs_lo":"#890F02"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"Basic network info per interface","fill":4,"gridPos":{"h":7,"w":12,"x":0,"y":18},"id":74,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*trans.*/","transform":"negative-Y"},{"alias":"/.*lo.*/","color":"#7EB26D"},{"alias":"/.*eth0.*/","color":"#EAB839"},{"alias":"/.*eth1.*/","color":"#6ED0E0"},{"alias":"/.*eth2.*/","color":"#EF843C"},{"alias":"/.*eth3.*/","color":"#E24D42"},{"alias":"/.*eth4.*/","color":"#1F78C1"},{"alias":"/.*eth5.*/","color":"#BA43A9"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(node_network_receive_bytes_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"recv {{device}}","refId":"A","step":240},{"expr":"rate(node_network_transmit_bytes_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"trans {{device}} ","refId":"B","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Traffic Basic","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"transparent":false,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"pps","label":"","logBase":1,"max":null,"min":null,"show":false}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":3,"description":"Disk space used of all filesystems mounted","fill":4,"gridPos":{"h":7,"w":12,"x":12,"y":18},"height":"","id":152,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sort":"current","sortDesc":false,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"100 - ((node_filesystem_avail_bytes{instance=~\"$node:$port\",job=~\"$job\",device!~'rootfs'} * 100) / node_filesystem_size_bytes{instance=~\"$node:$port\",job=~\"$job\",device!~'rootfs'})","format":"time_series","intervalFactor":2,"legendFormat":"{{mountpoint}}","refId":"A","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Space Used Basic","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percent","label":null,"logBase":1,"max":"100","min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":25},"id":265,"panels":[{"aliasColors":{"Idle - Waiting for something to happen":"#052B51","guest":"#9AC48A","idle":"#052B51","iowait":"#EAB839","irq":"#BF1B00","nice":"#C15C17","softirq":"#E24D42","steal":"#FCE2DE","system":"#508642","user":"#5195CE"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"description":"","fill":4,"gridPos":{"h":12,"w":12,"x":0,"y":26},"id":3,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":250,"sort":null,"sortDesc":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":true,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (mode)(irate(node_cpu_seconds_total{mode=\"system\",instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","interval":"10s","intervalFactor":2,"legendFormat":"System - Processes executing in kernel mode","refId":"A","step":20},{"expr":"sum by (mode)(irate(node_cpu_seconds_total{mode='user',instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","intervalFactor":2,"legendFormat":"User - Normal processes executing in user mode","refId":"B","step":240},{"expr":"sum by (mode)(irate(node_cpu_seconds_total{mode='nice',instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","intervalFactor":2,"legendFormat":"Nice - Niced processes executing in user mode","refId":"C","step":240},{"expr":"sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","intervalFactor":2,"legendFormat":"Idle - Waiting for something to happen","refId":"F","step":240},{"expr":"sum by (mode)(irate(node_cpu_seconds_total{mode='iowait',instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","intervalFactor":2,"legendFormat":"Iowait - Waiting for I/O to complete","refId":"D","step":240},{"expr":"sum by (mode)(irate(node_cpu_seconds_total{mode='irq',instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","intervalFactor":2,"legendFormat":"Irq - Servicing interrupts","refId":"G","step":240},{"expr":"sum by (mode)(irate(node_cpu_seconds_total{mode='softirq',instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","intervalFactor":2,"legendFormat":"Softirq - Servicing softirqs","refId":"H","step":240},{"expr":"sum by (mode)(irate(node_cpu_seconds_total{mode='steal',instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","intervalFactor":2,"legendFormat":"Steal - Time spent in other operating systems when running in a virtualized environment","refId":"E","step":240},{"expr":"sum by (mode)(irate(node_cpu_seconds_total{mode='guest',instance=~\"$node:$port\",job=~\"$job\"}[5m])) * 100","format":"time_series","intervalFactor":2,"legendFormat":"Guest - Time spent running a virtual CPU for a guest operating system","refId":"I","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"transparent":false,"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Percentage","logBase":1,"max":"100","min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap - Swap memory usage":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839","Unused - Free memory unasigned":"#052B51"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"description":"","fill":4,"gridPos":{"h":12,"w":12,"x":12,"y":26},"id":24,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"sort":null,"sortDesc":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Harware Corrupted - *./","stack":false}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"node_memory_MemTotal_bytes{instance=~\"$node:$port\",job=~\"$job\"} - node_memory_MemFree_bytes{instance=~\"$node:$port\",job=~\"$job\"} - node_memory_Buffers_bytes{instance=~\"$node:$port\",job=~\"$job\"} - node_memory_Cached_bytes{instance=~\"$node:$port\",job=~\"$job\"} - node_memory_Slab_bytes{instance=~\"$node:$port\",job=~\"$job\"} - node_memory_PageTables_bytes{instance=~\"$node:$port\",job=~\"$job\"} - node_memory_SwapCached_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"Apps - Memory used by user-space applications","refId":"Q","step":240},{"expr":"node_memory_PageTables_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"PageTables - Memory used to map between virtual and physical memory addresses","refId":"G","step":240},{"expr":"node_memory_SwapCached_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"SwapCache - Memory that keeps track of pages that have been fetched from swap but not yet been modified","refId":"F","step":240},{"expr":"node_memory_Slab_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"Slab - Memory used by the kernel to cache data structures for its own use (caches like inode, dentry, etc)","refId":"E","step":240},{"expr":"node_memory_Cached_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"Cache - Parked file data (file content) cache","refId":"C","step":240},{"expr":"node_memory_Buffers_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"Buffers - Block device (e.g. harddisk) cache","refId":"B","step":240},{"expr":"node_memory_MemFree_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"Unused - Free memory unasigned","refId":"D","step":240},{"expr":"(node_memory_SwapTotal_bytes{instance=~\"$node:$port\",job=~\"$job\"} - node_memory_SwapFree_bytes{instance=~\"$node:$port\",job=~\"$job\"})","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"Swap - Swap space used","refId":"I","step":240},{"expr":"node_memory_HardwareCorrupted_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working","refId":"O","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Stack","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"receive_packets_eth0":"#7EB26D","receive_packets_lo":"#E24D42","transmit_packets_eth0":"#7EB26D","transmit_packets_lo":"#E24D42"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":4,"gridPos":{"h":12,"w":12,"x":0,"y":38},"id":84,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Trans.*/","transform":"negative-Y"},{"alias":"/.*lo.*/","color":"#7EB26D"},{"alias":"/.*eth0.*/","color":"#EAB839"},{"alias":"/.*eth1.*/","color":"#6ED0E0"},{"alias":"/.*eth2.*/","color":"#EF843C"},{"alias":"/.*eth3.*/","color":"#E24D42"},{"alias":"/.*eth4.*/","color":"#1F78C1"},{"alias":"/.*eth5.*/","color":"#BA43A9"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_network_receive_bytes_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Receive","refId":"O","step":240},{"expr":"irate(node_network_transmit_bytes_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Transmit","refId":"P","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Traffic","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":"Bytes out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":3,"description":"","fill":4,"gridPos":{"h":12,"w":12,"x":12,"y":38},"height":"","id":156,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":false,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_filesystem_size_bytes{instance=~\"$node:$port\",job=~\"$job\",device!~'rootfs'} - node_filesystem_avail_bytes{instance=~\"$node:$port\",job=~\"$job\",device!~'rootfs'}","format":"time_series","intervalFactor":2,"legendFormat":"{{mountpoint}}","refId":"A","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Space Used","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":12,"w":12,"x":0,"y":50},"id":229,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":true,"max":true,"min":true,"rightSide":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Read.*/","transform":"negative-Y"},{"alias":"/.*sda_.*/","color":"#7EB26D"},{"alias":"/.*sdb_.*/","color":"#EAB839"},{"alias":"/.*sdc_.*/","color":"#6ED0E0"},{"alias":"/.*sdd_.*/","color":"#EF843C"},{"alias":"/.*sde_.*/","color":"#E24D42"},{"alias":"/.*sda1.*/","color":"#584477"},{"alias":"/.*sda2_.*/","color":"#BA43A9"},{"alias":"/.*sda3_.*/","color":"#F4D598"},{"alias":"/.*sdb1.*/","color":"#0A50A1"},{"alias":"/.*sdb2.*/","color":"#BF1B00"},{"alias":"/.*sdb3.*/","color":"#E0752D"},{"alias":"/.*sdc1.*/","color":"#962D82"},{"alias":"/.*sdc2.*/","color":"#614D93"},{"alias":"/.*sdc3.*/","color":"#9AC48A"},{"alias":"/.*sdd1.*/","color":"#65C5DB"},{"alias":"/.*sdd2.*/","color":"#F9934E"},{"alias":"/.*sdd3.*/","color":"#EA6460"},{"alias":"/.*sde1.*/","color":"#E0F9D7"},{"alias":"/.*sdd2.*/","color":"#FCEACA"},{"alias":"/.*sde3.*/","color":"#F9E2D2"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_disk_reads_completed_total{instance=~\"$node:$port\",job=~\"$job\",device=~\"[a-z]*[a-z]\"}[5m])","intervalFactor":4,"legendFormat":"{{device}} - Reads completed","refId":"A","step":480},{"expr":"irate(node_disk_writes_completed_total{instance=~\"$node:$port\",job=~\"$job\",device=~\"[a-z]*[a-z]\"}[5m])","intervalFactor":2,"legendFormat":"{{device}} - Writes completed","refId":"B","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk IOps","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"iops","label":"IO read (-) / write (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"io time":"#890F02"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":3,"description":"","fill":4,"gridPos":{"h":12,"w":12,"x":12,"y":50},"id":42,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sort":null,"sortDesc":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*read*./","transform":"negative-Y"},{"alias":"/.*sda.*/","color":"#7EB26D"},{"alias":"/.*sdb.*/","color":"#EAB839"},{"alias":"/.*sdc.*/","color":"#6ED0E0"},{"alias":"/.*sdd.*/","color":"#EF843C"},{"alias":"/.*sde.*/","color":"#E24D42"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_disk_read_bytes_total{instance=~\"$node:$port\",job=~\"$job\",device=~\"[a-z]*[a-z]\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"{{device}} - Successfully read bytes","refId":"A","step":240},{"expr":"irate(node_disk_written_bytes_total{instance=~\"$node:$port\",job=~\"$job\",device=~\"[a-z]*[a-z]\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"{{device}} - Successfully written bytes","refId":"B","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"I/O Usage Read / Write","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":false,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes read (-) / write (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"ms","label":"","logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{"io time":"#890F02"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":3,"description":"","fill":4,"gridPos":{"h":12,"w":12,"x":0,"y":62},"id":127,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sort":null,"sortDesc":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_disk_io_time_seconds_total{instance=~\"$node:$port\",job=~\"$job\",device=~\"[a-z]*[a-z]\"} [5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"{{device}} - Milliseconds spent doing I/Os","refId":"C","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"I/O Usage Times","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":false,"values":[]},"yaxes":[{"format":"ms","label":"Milliseconds","logBase":1,"max":null,"min":"0","show":true},{"format":"ms","label":"","logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"CPU Memory Net Disk","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":26},"id":266,"panels":[{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":63},"id":136,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"node_memory_Inactive_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Inactive - Memory which has been less recently used. It is more eligible to be reclaimed for other purposes","refId":"K","step":4},{"expr":"node_memory_Active_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Active - Memory that has been used more recently and usually not reclaimed unless absolutely necessary","refId":"J","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Active / Inactive","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":63},"id":135,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Committed_AS - *./"},{"alias":"/.*CommitLimit - *./","color":"#BF1B00","fill":0}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_memory_Committed_AS_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Committed_AS - Amount of memory presently allocated on the system","refId":"A","step":4},{"expr":"node_memory_CommitLimit_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"CommitLimit - Amount of memory currently available to be allocated on the system","refId":"M","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Commited","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":73},"id":191,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"node_memory_Inactive_file_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"Inactive_file - File-backed memory on inactive LRU list","refId":"A","step":4},{"expr":"node_memory_Inactive_anon_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"Inactive_anon - Anonymous and swap cache on inactive LRU list, including tmpfs (shmem)","refId":"D","step":4},{"expr":"node_memory_Active_file_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"Active_file - File-backed memory on active LRU list","refId":"B","step":4},{"expr":"node_memory_Active_anon_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"Active_anon - Anonymous and swap cache on active least-recently-used (LRU) list, including tmpfs","refId":"C","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Active / Inactive Detail","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{"Active":"#99440A","Buffers":"#58140C","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Dirty":"#6ED0E0","Free":"#B7DBAB","Inactive":"#EA6460","Mapped":"#052B51","PageTables":"#0A50A1","Page_Tables":"#0A50A1","Slab_Cache":"#EAB839","Swap":"#BF1B00","Swap_Cache":"#C15C17","Total":"#511749","Total RAM":"#052B51","Total RAM + Swap":"#052B51","Total Swap":"#614D93","VmallocUsed":"#EA6460"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":73},"id":130,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_memory_Writeback_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Writeback - Memory which is actively being written back to disk","refId":"J","step":4},{"expr":"node_memory_WritebackTmp_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"WritebackTmp - Memory used by FUSE for temporary writeback buffers","refId":"K","step":4},{"expr":"node_memory_Dirty_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Dirty - Memory which is waiting to get written back to the disk","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Writeback and Dirty","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":83},"id":138,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_memory_Mapped_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Mapped - Used memory in mapped pages files which have been mmaped, such as libraries","refId":"A","step":4},{"expr":"node_memory_Shmem_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Shmem - Used shared memory (shared between several processes, thus including RAM disks)","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Shared and Mapped","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Active":"#99440A","Buffers":"#58140C","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Dirty":"#6ED0E0","Free":"#B7DBAB","Inactive":"#EA6460","Mapped":"#052B51","PageTables":"#0A50A1","Page_Tables":"#0A50A1","Slab_Cache":"#EAB839","Swap":"#BF1B00","Swap_Cache":"#C15C17","Total":"#511749","Total RAM":"#052B51","Total RAM + Swap":"#052B51","Total Swap":"#614D93","VmallocUsed":"#EA6460"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":83},"id":131,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"node_memory_SUnreclaim_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"SUnreclaim - Part of Slab, that cannot be reclaimed on memory pressure","refId":"O","step":4},{"expr":"node_memory_SReclaimable_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"SReclaimable - Part of Slab, that might be reclaimed, such as caches","refId":"N","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Slab","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Active":"#99440A","Buffers":"#58140C","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Dirty":"#6ED0E0","Free":"#B7DBAB","Inactive":"#EA6460","Mapped":"#052B51","PageTables":"#0A50A1","Page_Tables":"#0A50A1","Slab_Cache":"#EAB839","Swap":"#BF1B00","Swap_Cache":"#C15C17","Total":"#511749","Total RAM":"#052B51","Total RAM + Swap":"#052B51","VmallocUsed":"#EA6460"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":93},"id":70,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_memory_VmallocChunk_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"VmallocChunk - Largest contigious block of vmalloc area which is free","refId":"H","step":4},{"expr":"node_memory_VmallocTotal_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"VmallocTotal - Total size of vmalloc memory area","refId":"I","step":4},{"expr":"node_memory_VmallocUsed_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"VmallocUsed - Amount of vmalloc area which is used","refId":"O","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Vmalloc","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":93},"id":159,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_memory_Bounce_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Bounce - Memory used for block device bounce buffers","refId":"N","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Bounce","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Active":"#99440A","Buffers":"#58140C","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Dirty":"#6ED0E0","Free":"#B7DBAB","Inactive":"#EA6460","Mapped":"#052B51","PageTables":"#0A50A1","Page_Tables":"#0A50A1","Slab_Cache":"#EAB839","Swap":"#BF1B00","Swap_Cache":"#C15C17","Total":"#511749","Total RAM":"#052B51","Total RAM + Swap":"#052B51","VmallocUsed":"#EA6460"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":103},"id":129,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Inactive *./","transform":"negative-Y"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_memory_AnonHugePages_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"AnonHugePages - Memory in anonymous huge pages","refId":"D","step":4},{"expr":"node_memory_AnonPages_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"AnonPages - Memory in user pages not backed by files","refId":"G","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Anonymous","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":103},"id":160,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_memory_KernelStack_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"KernelStack - Kernel memory stack. This is not reclaimable","refId":"N","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Kernel","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Active":"#99440A","Buffers":"#58140C","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Dirty":"#6ED0E0","Free":"#B7DBAB","Inactive":"#EA6460","Mapped":"#052B51","PageTables":"#0A50A1","Page_Tables":"#0A50A1","Slab_Cache":"#EAB839","Swap":"#BF1B00","Swap_Cache":"#C15C17","Total":"#511749","Total RAM":"#806EB7","Total RAM + Swap":"#806EB7","VmallocUsed":"#EA6460"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":113},"id":140,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_memory_HugePages_Free{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"HugePages_Free - Huge pages in the pool that are not yet allocated","refId":"I","step":4},{"expr":"node_memory_HugePages_Rsvd{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"HugePages_Rsvd - Huge pages for which a commitment to allocate from the pool has been made, but no allocation has yet been made","refId":"J","step":4},{"expr":"node_memory_HugePages_Surp{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"HugePages_Surp - Huge pages in the pool above the value in /proc/sys/vm/nr_hugepages","refId":"K","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory HugePages Counter","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Active":"#99440A","Buffers":"#58140C","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Dirty":"#6ED0E0","Free":"#B7DBAB","Inactive":"#EA6460","Mapped":"#052B51","PageTables":"#0A50A1","Page_Tables":"#0A50A1","Slab_Cache":"#EAB839","Swap":"#BF1B00","Swap_Cache":"#C15C17","Total":"#511749","Total RAM":"#806EB7","Total RAM + Swap":"#806EB7","VmallocUsed":"#EA6460"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":113},"id":71,"legend":{"alignAsTable":true,"avg":false,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_memory_HugePages_Total{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"HugePages - Total size of the pool of huge pages","refId":"L","step":4},{"expr":"node_memory_Hugepagesize_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Hugepagesize - Huge Page size","refId":"D","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory HugePages Size","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":"","logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Active":"#99440A","Buffers":"#58140C","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Dirty":"#6ED0E0","Free":"#B7DBAB","Inactive":"#EA6460","Mapped":"#052B51","PageTables":"#0A50A1","Page_Tables":"#0A50A1","Slab_Cache":"#EAB839","Swap":"#BF1B00","Swap_Cache":"#C15C17","Total":"#511749","Total RAM":"#052B51","Total RAM + Swap":"#052B51","VmallocUsed":"#EA6460"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":123},"id":128,"legend":{"alignAsTable":true,"avg":true,"current":false,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_memory_DirectMap1G{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"DirectMap1G - Amount of pages mapped as this size","refId":"J","step":4},{"expr":"node_memory_DirectMap2M_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"DirectMap2M - Amount of pages mapped as this size","refId":"K","step":4},{"expr":"node_memory_DirectMap4k_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"DirectMap4K - Amount of pages mapped as this size","refId":"L","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory DirectMap","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":123},"id":137,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_memory_Unevictable_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Unevictable - Amount of unevictable memory that can't be swapped out for a variety of reasons","refId":"P","step":4},{"expr":"node_memory_Mlocked_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"MLocked - Size of pages locked to memory using the mlock() system call","refId":"C","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Unevictable and MLocked","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Active":"#99440A","Buffers":"#58140C","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Dirty":"#6ED0E0","Free":"#B7DBAB","Inactive":"#EA6460","Mapped":"#052B51","PageTables":"#0A50A1","Page_Tables":"#0A50A1","Slab_Cache":"#EAB839","Swap":"#BF1B00","Swap_Cache":"#C15C17","Total":"#511749","Total RAM":"#052B51","Total RAM + Swap":"#052B51","Total Swap":"#614D93","VmallocUsed":"#EA6460"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":133},"id":132,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_memory_NFS_Unstable_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"NFS Unstable - Memory in NFS pages sent to the server, but not yet commited to the storage","refId":"L","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory NFS","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"Memory Detail Meminfo","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":27},"id":267,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":134},"id":176,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*out/","transform":"negative-Y"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_pgpgin{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pagesin - Page in operations","refId":"A","step":4},{"expr":"irate(node_vmstat_pgpgout{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pagesout - Page out operations","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Pages In / Out","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":134},"id":22,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*out/","transform":"negative-Y"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_pswpin{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pswpin - Pages swapped in","refId":"A","step":4},{"expr":"irate(node_vmstat_pswpout{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pswpout - Pages swapped out","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Pages Swap In / Out","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":144},"id":197,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_pgdeactivate{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgdeactivate - Pages moved from active to inactive","refId":"B","step":4},{"expr":"irate(node_vmstat_pgfree{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgfree - Page free operations","refId":"D","step":4},{"expr":"irate(node_vmstat_pgactivate{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgactivate - Pages moved from inactive to active","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Operations","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":144},"id":175,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"Pgfault - Page major and minor fault operations","fill":0,"stack":false}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_pgfault{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgfault - Page major and minor fault operations","refId":"C","step":4},{"expr":"irate(node_vmstat_pgmajfault{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgmajfault - Major page fault operations","refId":"F","step":4},{"expr":"irate(node_vmstat_pgfault{instance=~\"$node:$port\",job=~\"$job\"}[5m]) - irate(node_vmstat_pgmajfault{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgminfault - Minnor page fault operations","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Faults","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Faults","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":154},"id":172,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_kswapd_inodesteal{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Kswapd_inodesteal - Pages reclaimed via kswapd inode freeing","refId":"A","step":4},{"expr":"irate(node_vmstat_pginodesteal{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgindesteal - Pages reclaimed via inode freeing","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Pages Reclaimed","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":154},"id":184,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_pageoutrun{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pageoutrun - Kswapd calls to page reclaim","refId":"A","step":4},{"expr":"irate(node_vmstat_allocstall{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":2,"legendFormat":"Allocstall - Direct reclaim calls","refId":"B","step":4},{"expr":"irate(node_vmstat_zone_reclaim_failed{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":2,"legendFormat":"Zone_reclaim_failed - Zone reclaim failures","refId":"C","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Calls Reclaimed","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Calls","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":164},"id":200,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_pgrotated{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgrotated - Pages rotated to tail of the LRU","refId":"D","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Rotate","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":164},"id":170,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_drop_pagecache{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Drop_pagecache - Calls to drop page cache pages","refId":"N","step":4},{"expr":"node_vmstat_drop_slab{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Drop_slab - Calls to drop slab cache pages","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Drop","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Calls","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":174},"id":183,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_slabs_scanned{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Slabs_scanned - Slab pages scanned","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Scan Slab","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":174},"id":181,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_unevictable_pgs_cleared{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Unevictable_pgs_cleared - Unevictable pages cleared","refId":"B","step":4},{"expr":"irate(node_vmstat_unevictable_pgs_culled{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Unevictable_pgs_culled - Unevictable pages culled","refId":"C","step":4},{"expr":"irate(node_vmstat_unevictable_pgs_mlocked{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Unevictable_pgs_mlocked - Unevictable pages mlocked","refId":"D","step":4},{"expr":"irate(node_vmstat_unevictable_pgs_munlocked{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Unevictable_pgs_munlocked - Unevictable pages munlocked","refId":"E","step":4},{"expr":"irate(node_vmstat_unevictable_pgs_rescued{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Unevictable_pgs_rescued- Unevictable pages rescued","refId":"F","step":4},{"expr":"irate(node_vmstat_unevictable_pgs_scanned{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Unevictable_pgs_scanned - Unevictable pages scanned","refId":"G","step":4},{"expr":"irate(node_vmstat_unevictable_pgs_stranded{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"unevictable_pgs_stranded - Unevictable pages stranded","refId":"H","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Unevictable Pages","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":184},"id":174,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_pgalloc_dma{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgalloc_dma - Dma mem page allocations","refId":"A","step":4},{"expr":"irate(node_vmstat_pgalloc_dma32{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgalloc_dma32 - Dma32 mem page allocations","refId":"B","step":4},{"expr":"irate(node_vmstat_pgalloc_movable{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgalloc_movable - Movable mem page allocations","refId":"C","step":4},{"expr":"irate(node_vmstat_pgalloc_normal{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgalloc_normal - Normal mem page allocations","refId":"D","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Allocation","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":184},"id":177,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_pgrefill_dma{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgrefill_dma - Dma mem pages inspected in refill_inactive_zone","refId":"B","step":4},{"expr":"irate(node_vmstat_pgrefill_dma32{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgrefill_dma32 - Dma32 mem pages inspected in refill_inactive_zone","refId":"C","step":4},{"expr":"irate(node_vmstat_pgrefill_movable{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgrefill_movable - Movable mem pages inspected in refill_inactive_zone","refId":"D","step":4},{"expr":"irate(node_vmstat_pgrefill_normal{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgrefill_normal - Normal mem pages inspected in refill_inactive_zone","refId":"E","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Refill","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":194},"id":179,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_pgsteal_direct_dma{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgsteal_direct_dma - Dma mem pages stealed","refId":"B","step":4},{"expr":"irate(node_vmstat_pgsteal_direct_dma32{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgsteal_direct_dma32 - Dma32 mem pages scanned","refId":"C","step":4},{"expr":"irate(node_vmstat_pgsteal_direct_movable{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgsteal_direct_movable - Movable mem pages scanned","refId":"D","step":4},{"expr":"irate(node_vmstat_pgsteal_direct_normal{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgsteal_direct_normal - Normal mem pages scanned","refId":"E","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Steal Direct","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":194},"id":198,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_pgsteal_kswapd_dma{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgsteal_kswapd_dma - Dma mem pages scanned by kswapd","refId":"F","step":4},{"expr":"irate(node_vmstat_pgsteal_kswapd_dma32{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgsteal_kswapd_dma32 - Dma32 mem pages scanned by kswapd","refId":"G","step":4},{"expr":"irate(node_vmstat_pgsteal_kswapd_movable{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgsteal_kswapd_movable - Movable mem pages scanned by kswapd","refId":"H","step":4},{"expr":"irate(node_vmstat_pgsteal_kswapd_normal{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgsteal_kswapd_normal - Normal mem pages scanned by kswapd","refId":"I","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Steal Kswapd","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":204},"id":192,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_pgscan_direct_dma{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgscan_direct_dma - Dma mem pages scanned","refId":"A","step":4},{"expr":"irate(node_vmstat_pgscan_direct_dma32{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgscan_direct_dma32 - Dma32 mem pages scanned","refId":"C","step":4},{"expr":"irate(node_vmstat_pgscan_direct_movable{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgscan_direct_movable - Movable mem pages scanned","refId":"D","step":4},{"expr":"irate(node_vmstat_pgscan_direct_normal{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgscan_direct_normal - Normal mem pages scanned","refId":"E","step":4},{"expr":"irate(node_vmstat_pgscan_direct_throttle{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":true,"intervalFactor":2,"legendFormat":"Pgscan_direct_throttle - ","refId":"F","step":2}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Scan Direct","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":204},"id":178,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_pgscan_kswapd_dma{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgscan_kswapd_dma - Dma mem pages scanned by kswapd","refId":"B","step":4},{"expr":"irate(node_vmstat_pgscan_kswapd_dma32{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgscan_kswapd_dma32 - Dma32 mem pages scanned by kswapd","refId":"G","step":4},{"expr":"irate(node_vmstat_pgscan_kswapd_movable{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgscan_kswapd_movable - Movable mem pages scanned by kswapd","refId":"H","step":4},{"expr":"irate(node_vmstat_pgscan_kswapd_normal{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgscan_kswapd_normal - Normal mem pages scanned by kswapd","refId":"I","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Scan Kswapd","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Active":"#99440A","Buffers":"#58140C","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Dirty":"#6ED0E0","Free":"#B7DBAB","Inactive":"#EA6460","Mapped":"#052B51","PageTables":"#0A50A1","Page_Tables":"#0A50A1","Slab_Cache":"#EAB839","Swap":"#BF1B00","Swap_Cache":"#C15C17","Total":"#511749","Total RAM":"#052B51","Total RAM + Swap":"#052B51","Total Swap":"#614D93","VmallocUsed":"#EA6460"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":214},"id":169,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*fail*./","color":"#890F02"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_compact_free_scanned{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Compact_free_scanned - Pages scanned for freeing by compaction daemon","refId":"B","step":4},{"expr":"irate(node_vmstat_compact_isolated{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Compact_isolated - Page isolations for memory compaction","refId":"C","step":4},{"expr":"irate(node_vmstat_compact_migrate_scanned{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Compact_migrate_scanned - Pages scanned for migration by compaction daemon","refId":"D","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Compact","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Active":"#99440A","Buffers":"#58140C","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Dirty":"#6ED0E0","Free":"#B7DBAB","Inactive":"#EA6460","Mapped":"#052B51","PageTables":"#0A50A1","Page_Tables":"#0A50A1","Slab_Cache":"#EAB839","Swap":"#BF1B00","Swap_Cache":"#C15C17","Total":"#511749","Total RAM":"#052B51","Total RAM + Swap":"#052B51","Total Swap":"#614D93","VmallocUsed":"#EA6460"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":214},"id":189,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*fail*./","color":"#890F02"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_compact_fail{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Compact_fail - Unsuccessful compactions for high order allocations","refId":"A","step":4},{"expr":"irate(node_vmstat_compact_stall{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Compact_stall - Failures to even start compacting","refId":"E","step":4},{"expr":"irate(node_vmstat_compact_success{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Compact_sucess - Successful compactions for high order allocations","refId":"F","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Compactions","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Compactions","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":224},"id":190,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_kswapd_high_wmark_hit_quickly{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Kswapd_high_wmark_hit_quickly - Times high watermark reached quickly","refId":"N","step":4},{"expr":"node_vmstat_kswapd_low_wmark_hit_quickly{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Kswapd_low_wmark_hit_quickly - Times low watermark reached quickly","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Kswapd Watermark","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":224},"id":171,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_htlb_buddy_alloc_fail{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Htlb_buddy_alloc_fail - Huge TLB page buddy allocation failures","refId":"N","step":4},{"expr":"node_vmstat_htlb_buddy_alloc_success{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Htlb_buddy_alloc_success - Huge TLB page buddy allocation successes","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Buddy Alloc","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Allocations","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":234},"id":173,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_numa_foreign{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Numa_foreign - Foreign NUMA zone allocations","refId":"A","step":4},{"expr":"irate(node_vmstat_numa_hit{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Numa_hit - Successful allocations from preferred NUMA zone","refId":"D","step":4},{"expr":"irate(node_vmstat_numa_interleave{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Numa_interleave - Interleaved NUMA allocations in each zone for each NUMA node","refId":"F","step":4},{"expr":"irate(node_vmstat_numa_local{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Numa_local - Successful allocations from local NUMA zone","refId":"G","step":4},{"expr":"irate(node_vmstat_numa_miss{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Numa_miss - Unsuccessful allocations from preferred NUMA zona","refId":"H","step":4},{"expr":"irate(node_vmstat_numa_other{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Numa_other - Unsuccessful allocations from local NUMA zone","refId":"I","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Numa Allocations","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Allocations","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":234},"id":193,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Numa_pages_migrated - *./","fill":0,"stack":false}],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_numa_pages_migrated{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Numa_pages_migrated - NUMA page migrations","refId":"J","step":4},{"expr":"irate(node_vmstat_pgmigrate_fail{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgmigrate_fail - Unsuccessful NUMA page migrations","refId":"A","step":4},{"expr":"irate(node_vmstat_pgmigrate_success{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Pgmigrate_success - Successful NUMA page migrations","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Numa Page Migrations","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":244},"id":194,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_numa_hint_faults{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Numa_hint_faults - NUMA hint faults trapped","refId":"B","step":4},{"expr":"irate(node_vmstat_numa_hint_faults_local{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Numa_hint_faults_local - Hinting faults to local nodes","refId":"C","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Numa Hints","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"HInts","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":244},"id":196,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":12,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_numa_pte_updates{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Numa_pte_updates - NUMA page table entry updates","refId":"K","step":4},{"expr":"irate(node_vmstat_numa_huge_pte_updates{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":2,"legendFormat":"Numa_huge_pte_updates - NUMA huge page table entry updates","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Numa Table Updates","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Updates","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":254},"id":199,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_thp_split{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Thp_split - Transparent huge page splits","refId":"F","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory THP Splits","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Splits","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":254},"id":182,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_workingset_activate{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Workingset_activate - Page activations to form the working set","refId":"C","step":4},{"expr":"irate(node_vmstat_workingset_nodereclaim{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Workingset_nodereclaim - NUMA node working set page reclaims","refId":"D","step":4},{"expr":"irate(node_vmstat_workingset_refault{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Workingset_refault - Refaults of previously evicted pages","refId":"E","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Workingset","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":264},"id":180,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_vmstat_thp_collapse_alloc{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Thp_collapse_alloc - Transparent huge page collapse allocations","refId":"A","step":4},{"expr":"irate(node_vmstat_thp_collapse_alloc_failed{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Thp_collapse_alloc_failed - Transparent huge page collapse allocation failures","refId":"C","step":4},{"expr":"irate(node_vmstat_thp_zero_page_alloc{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Thp_zero_page_alloc - Transparent huge page zeroed page allocations","refId":"G","step":4},{"expr":"irate(node_vmstat_thp_zero_page_alloc_failed{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Thp_zero_page_alloc_failed - Transparent huge page zeroed page allocation failures","refId":"H","step":4},{"expr":"irate(node_vmstat_thp_fault_alloc{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":2,"legendFormat":"Thp_fault_alloc - Transparent huge page fault allocations","refId":"B","step":4},{"expr":"irate(node_vmstat_thp_fault_fallback{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":2,"legendFormat":"Thp_fault_fallback - Transparent huge page fault fallbacks","refId":"D","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory THP Allocations","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Allocations","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"Memory Detail Vmstat","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":28},"id":268,"panels":[{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":265},"id":185,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_active_anon{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Active_anon - Active anonymous memory pages","refId":"B","step":240},{"expr":"node_vmstat_nr_active_file{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Active_file - Active file memory memory pages","refId":"C","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Active","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":265},"id":228,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_inactive_anon{instance=~\"$node:$port\",job=~\"$job\"}","intervalFactor":2,"legendFormat":"Inactive_anon - Inactive anonymous memory pages in each zone for each NUMA node","refId":"A","step":240},{"expr":"node_vmstat_nr_inactive_file{instance=~\"$node:$port\",job=~\"$job\"}","intervalFactor":2,"legendFormat":"Inactive_file - Inactive file memory pages in each zone for each NUMA node","refId":"D","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Inactive","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":275},"id":188,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_slab_reclaimable{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Reclaimable - Instantaneous reclaimable slab pages","refId":"A","step":240},{"expr":"node_vmstat_nr_slab_unreclaimable{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Unreclaimable - Instantaneous unreclaimable slab pages","refId":"C","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Reclaimed / Unreclaimed","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":275},"id":186,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_free_pages{instance=~\"$node:$port\",job=~\"$job\"}","intervalFactor":2,"legendFormat":"Free_pages - Free pages","refId":"B","step":240},{"expr":"node_vmstat_nr_written{instance=~\"$node:$port\",job=~\"$job\"}","intervalFactor":2,"legendFormat":"Written - Pages written out in each zone for each NUMA node","refId":"A","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Free / Written","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":285},"id":218,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_dirty{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Dirty - Pages in dirty state","refId":"C","step":240},{"expr":"node_vmstat_nr_bounce{instance=~\"$node:$port\",job=~\"$job\"}","intervalFactor":2,"legendFormat":"Bounce - Bounce buffer pages","refId":"A","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Dirty / Bounce","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":285},"id":201,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_unevictable{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Unevictable - Unevictable pages","refId":"B","step":240},{"expr":"node_vmstat_nr_mlock{instance=~\"$node:$port\",job=~\"$job\"}","intervalFactor":2,"legendFormat":"Mlock - Pages under mlock","refId":"A","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Pages Unevictable / Mlock","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":295},"id":214,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_shmem{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Shmem - Shared memory pages","refId":"H","step":240},{"expr":"node_vmstat_nr_mapped{instance=~\"$node:$port\",job=~\"$job\"}","intervalFactor":2,"legendFormat":"Mapped - Mapped pagecache pages in each zone for each NUMA node","refId":"A","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Shmem / Mapped","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":295},"id":212,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_kernel_stack{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Kernel_stack - Pages of kernel stack","refId":"F","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Kernel_stack","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":305},"id":203,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_writeback{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Writeback - Writeback pages","refId":"G","step":240},{"expr":"node_vmstat_nr_writeback_temp{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Writeback_temp - Temporary writeback pages","refId":"H","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Pages Writeback","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":305},"id":205,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_file_pages{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"File_pages - File pagecache pages in each zone for each NUMA node","refId":"F","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page File_pages","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":315},"id":206,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_dirty_background_threshold{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Dirty_background_threshold - Background writeback threshold","refId":"D","step":240},{"expr":"node_vmstat_nr_dirty_threshold{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Dirty_threshold - Dirty throttling threshold","refId":"E","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Dirty Threshold","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":315},"id":208,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_unstable{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Unstable - Pages unstable state in each zone for each NUMA node","refId":"D","step":240},{"expr":"node_vmstat_nr_dirtied{instance=~\"$node:$port\",job=~\"$job\"}","intervalFactor":2,"legendFormat":"Dirtied - Pages entering dirty state in each zone for each NUMA node","refId":"A","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Unstable / Dirtied","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":325},"id":209,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_page_table_pages{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Page_table_pages - Page table pages in each zone for each NUMA node","refId":"A","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Page_table_pages","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":325},"id":217,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_alloc_batch{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Alloc_batch - Pages allocated to other zones due to insufficient memory for each zone for each NUMA node","refId":"D","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Alloc_batch","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":335},"id":213,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_isolated_anon{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Isolated_anon - Isolated anonymous memory pages in each zone for each NUMA node","refId":"D","step":240},{"expr":"node_vmstat_nr_isolated_file{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Isolated_file - Isolated file memory pages in each zone for each NUMA node","refId":"E","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Isolated","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":335},"id":216,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_anon_pages{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Anon_pages - Anonymous mapped pagecache pages in each zone for each NUMA node","refId":"E","step":240},{"expr":"node_vmstat_nr_anon_transparent_hugepages{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Anon_transparent_hugepages - Anonymous transparent huge pages in each zone for each NUMA node","refId":"F","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Anon","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"Apps":"#629E51","Buffers":"#614D93","Cache":"#6D1F62","Cached":"#511749","Committed":"#508642","Free":"#0A437C","Harware Corrupted - Amount of RAM that the kernel identified as corrupted / not working":"#CFFAFF","Inactive":"#584477","PageTables":"#0A50A1","Page_Tables":"#0A50A1","RAM_Free":"#E0F9D7","Slab":"#806EB7","Slab_Cache":"#E0752D","Swap":"#BF1B00","Swap_Cache":"#C15C17","Swap_Free":"#2F575E","Unused":"#EAB839"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":2,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":345},"id":204,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":350,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_vmstat_nr_free_cma{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Free_cma - Free Contiguous Memory Allocator pages in each zone for each NUMA node","refId":"G","step":240},{"expr":"node_vmstat_nr_vmscan_write{instance=~\"$node:$port\",job=~\"$job\"}","intervalFactor":2,"legendFormat":"Vmscan_write - Pages written by VM scanner from LRU","refId":"B","step":240},{"expr":"node_vmstat_nr_vmscan_immediate_reclaim{instance=~\"$node:$port\",job=~\"$job\"}","intervalFactor":2,"legendFormat":"Immediate_reclaim - Prioritise for reclaim when writeback ends in each zone for each NUMA node","refId":"C","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Page Misc","tooltip":{"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Pages","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"Memory Detail Vmstat Counters","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":29},"id":269,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":346},"id":8,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_context_switches_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Context switches","refId":"A","step":240},{"expr":"irate(node_intr_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Interrupts","refId":"B","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Context Switches / Interrupts","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":346},"id":7,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_load1{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":4,"legendFormat":"Load 1m","refId":"A","step":480},{"expr":"node_load5{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":4,"legendFormat":"Load 5m","refId":"B","step":480},{"expr":"node_load15{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":4,"legendFormat":"Load 15m","refId":"C","step":480}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"System Load","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Load","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":356},"id":259,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Critical*./","color":"#E24D42","fill":0},{"alias":"/.*Max*./","color":"#EF843C","fill":0}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_interrupts_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"{{ type }} - {{ info }}","refId":"A","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Interrupts Detail","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":356},"id":64,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Max*./","color":"#890F02","fill":0}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"process_max_fds{instance=~\"$node:$port\",job=~\"$job\"}","interval":"","intervalFactor":2,"legendFormat":"Maximum open file descriptors","refId":"A","step":240},{"expr":"process_open_fds{instance=~\"$node:$port\",job=~\"$job\"}","interval":"","intervalFactor":2,"legendFormat":"Open file descriptors","refId":"B","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"File Descriptors","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Descriptors","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":366},"id":151,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_entropy_available_bits{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Entropy available to random number generators","refId":"C","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Entropy","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Entropy","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":366},"id":62,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_procs_blocked{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Processes blocked waiting for I/O to complete","refId":"A","step":240},{"expr":"node_procs_running{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Processes in runnable state","refId":"B","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Processes State","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Processes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":376},"id":148,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"rate(node_forks_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"Processes forks second","refId":"C","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Processes Forks","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Forks / sec","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":376},"id":149,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"process_virtual_memory_bytes{instance=~\"$node:$port\",job=~\"$job\"}","interval":"","intervalFactor":2,"legendFormat":"Processes virtual memory size in bytes","refId":"C","step":240},{"expr":"process_resident_memory_bytes{instance=~\"$node:$port\",job=~\"$job\"}","interval":"","intervalFactor":2,"legendFormat":"Processes resident memory size in bytes","refId":"A","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Processes Memory","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"decbytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":386},"id":168,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Variation*./","color":"#890F02"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_timex_sync_status{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"Is clock synchronized to a reliable server (1 = yes, 0 = no)","refId":"B","step":240},{"expr":"node_timex_frequency_adjustment_ratio{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"Local clock frequency adjustment","refId":"A","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Time Syncronized Status","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":386},"id":260,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Variation*./","color":"#890F02"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_timex_estimated_error_seconds{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"interval":"","intervalFactor":2,"legendFormat":"Estimated error in seconds","refId":"B","step":240},{"expr":"node_timex_offset_seconds{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"interval":"","intervalFactor":2,"legendFormat":"Time offset in between local system and reference clock","refId":"A","step":240},{"expr":"node_timex_maxerror_seconds{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"interval":"","intervalFactor":2,"legendFormat":"Maximum error in seconds","refId":"C","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Time Syncronized Drift","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":"Seconds","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":396},"id":158,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Critical*./","color":"#E24D42","fill":0},{"alias":"/.*Max*./","color":"#EF843C","fill":0}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_hwmon_temp_celsius{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"{{ chip }} {{ sensor }} temp","refId":"A","step":240},{"expr":"node_hwmon_temp_crit_alarm_celsius{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":true,"interval":"","intervalFactor":2,"legendFormat":"{{ chip }} {{ sensor }} Critical Alarm","refId":"B","step":240},{"expr":"node_hwmon_temp_crit_celsius{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","interval":"","intervalFactor":2,"legendFormat":"{{ chip }} {{ sensor }} Critical","refId":"C","step":240},{"expr":"node_hwmon_temp_crit_hyst_celsius{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":true,"interval":"","intervalFactor":2,"legendFormat":"{{ chip }} {{ sensor }} Critical Historical","refId":"D","step":240},{"expr":"node_hwmon_temp_max_celsius{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":true,"interval":"","intervalFactor":2,"legendFormat":"{{ chip }} {{ sensor }} Max","refId":"E","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Hardware temperature monitor","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"celsius","label":"Temperature","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"System Detail","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":30},"id":270,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":397},"id":9,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":true,"max":true,"min":true,"rightSide":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[{"alias":"/.*Read.*/","transform":"negative-Y"},{"alias":"/.*sda_.*/","color":"#7EB26D"},{"alias":"/.*sdb_.*/","color":"#EAB839"},{"alias":"/.*sdc_.*/","color":"#6ED0E0"},{"alias":"/.*sdd_.*/","color":"#EF843C"},{"alias":"/.*sde_.*/","color":"#E24D42"},{"alias":"/.*sda1.*/","color":"#584477"},{"alias":"/.*sda2_.*/","color":"#BA43A9"},{"alias":"/.*sda3_.*/","color":"#F4D598"},{"alias":"/.*sdb1.*/","color":"#0A50A1"},{"alias":"/.*sdb2.*/","color":"#BF1B00"},{"alias":"/.*sdb3.*/","color":"#E0752D"},{"alias":"/.*sdc1.*/","color":"#962D82"},{"alias":"/.*sdc2.*/","color":"#614D93"},{"alias":"/.*sdc3.*/","color":"#9AC48A"},{"alias":"/.*sdd1.*/","color":"#65C5DB"},{"alias":"/.*sdd2.*/","color":"#F9934E"},{"alias":"/.*sdd3.*/","color":"#EA6460"},{"alias":"/.*sde1.*/","color":"#E0F9D7"},{"alias":"/.*sdd2.*/","color":"#FCEACA"},{"alias":"/.*sde3.*/","color":"#F9E2D2"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_disk_reads_completed_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":4,"legendFormat":"{{device}} - Reads completed","refId":"A","step":8},{"expr":"irate(node_disk_writes_completed_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":2,"legendFormat":"{{device}} - Writes completed","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk IOps Completed","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"iops","label":"IO read (-) / write (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":397},"id":33,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":true,"max":true,"min":true,"rightSide":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Read.*/","transform":"negative-Y"},{"alias":"/.*sda_.*/","color":"#7EB26D"},{"alias":"/.*sdb_.*/","color":"#EAB839"},{"alias":"/.*sdc_.*/","color":"#6ED0E0"},{"alias":"/.*sdd_.*/","color":"#EF843C"},{"alias":"/.*sde_.*/","color":"#E24D42"},{"alias":"/.*sda1.*/","color":"#584477"},{"alias":"/.*sda2_.*/","color":"#BA43A9"},{"alias":"/.*sda3_.*/","color":"#F4D598"},{"alias":"/.*sdb1.*/","color":"#0A50A1"},{"alias":"/.*sdb2.*/","color":"#BF1B00"},{"alias":"/.*sdb3.*/","color":"#E0752D"},{"alias":"/.*sdc1.*/","color":"#962D82"},{"alias":"/.*sdc2.*/","color":"#614D93"},{"alias":"/.*sdc3.*/","color":"#9AC48A"},{"alias":"/.*sdd1.*/","color":"#65C5DB"},{"alias":"/.*sdd2.*/","color":"#F9934E"},{"alias":"/.*sdd3.*/","color":"#EA6460"},{"alias":"/.*sde1.*/","color":"#E0F9D7"},{"alias":"/.*sdd2.*/","color":"#FCEACA"},{"alias":"/.*sde3.*/","color":"#F9E2D2"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_disk_read_bytes_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":4,"legendFormat":"{{device}} - Read bytes","refId":"A","step":8},{"expr":"irate(node_disk_written_bytes_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Written bytes","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk R/W Data","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":"Bytes read (-) / write (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":3,"gridPos":{"h":10,"w":12,"x":0,"y":407},"id":37,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":true,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Read.*/","transform":"negative-Y"},{"alias":"/.*sda_.*/","color":"#7EB26D"},{"alias":"/.*sdb_.*/","color":"#EAB839"},{"alias":"/.*sdc_.*/","color":"#6ED0E0"},{"alias":"/.*sdd_.*/","color":"#EF843C"},{"alias":"/.*sde_.*/","color":"#E24D42"},{"alias":"/.*sda1.*/","color":"#584477"},{"alias":"/.*sda2_.*/","color":"#BA43A9"},{"alias":"/.*sda3_.*/","color":"#F4D598"},{"alias":"/.*sdb1.*/","color":"#0A50A1"},{"alias":"/.*sdb2.*/","color":"#BF1B00"},{"alias":"/.*sdb3.*/","color":"#E0752D"},{"alias":"/.*sdc1.*/","color":"#962D82"},{"alias":"/.*sdc2.*/","color":"#614D93"},{"alias":"/.*sdc3.*/","color":"#9AC48A"},{"alias":"/.*sdd1.*/","color":"#65C5DB"},{"alias":"/.*sdd2.*/","color":"#F9934E"},{"alias":"/.*sdd3.*/","color":"#EA6460"},{"alias":"/.*sde1.*/","color":"#E0F9D7"},{"alias":"/.*sdd2.*/","color":"#FCEACA"},{"alias":"/.*sde3.*/","color":"#F9E2D2"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_disk_read_time_seconds_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","hide":false,"intervalFactor":4,"legendFormat":"{{device}} - Read time ms","refId":"A","step":8},{"expr":"irate(node_disk_write_time_seconds_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","hide":false,"intervalFactor":2,"legendFormat":"{{device}} - Write time ms","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk R/W Time","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":"Millisec. read (-) / write (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":407},"id":35,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":true,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*sda_.*/","color":"#7EB26D"},{"alias":"/.*sdb_.*/","color":"#EAB839"},{"alias":"/.*sdc_.*/","color":"#6ED0E0"},{"alias":"/.*sdd_.*/","color":"#EF843C"},{"alias":"/.*sde_.*/","color":"#E24D42"},{"alias":"/.*sda1.*/","color":"#584477"},{"alias":"/.*sda2_.*/","color":"#BA43A9"},{"alias":"/.*sda3_.*/","color":"#F4D598"},{"alias":"/.*sdb1.*/","color":"#0A50A1"},{"alias":"/.*sdb2.*/","color":"#BF1B00"},{"alias":"/.*sdb3.*/","color":"#E0752D"},{"alias":"/.*sdc1.*/","color":"#962D82"},{"alias":"/.*sdc2.*/","color":"#614D93"},{"alias":"/.*sdc3.*/","color":"#9AC48A"},{"alias":"/.*sdd1.*/","color":"#65C5DB"},{"alias":"/.*sdd2.*/","color":"#F9934E"},{"alias":"/.*sdd3.*/","color":"#EA6460"},{"alias":"/.*sde1.*/","color":"#E0F9D7"},{"alias":"/.*sdd2.*/","color":"#FCEACA"},{"alias":"/.*sde3.*/","color":"#F9E2D2"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_disk_io_time_weighted_seconds_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":4,"legendFormat":"{{device}} - IO time weighted","refId":"A","step":8}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk IOs Weighted","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":"Milliseconds","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":417},"id":133,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":true,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Read.*/","transform":"negative-Y"},{"alias":"/.*sda_.*/","color":"#7EB26D"},{"alias":"/.*sdb_.*/","color":"#EAB839"},{"alias":"/.*sdc_.*/","color":"#6ED0E0"},{"alias":"/.*sdd_.*/","color":"#EF843C"},{"alias":"/.*sde_.*/","color":"#E24D42"},{"alias":"/.*sda1.*/","color":"#584477"},{"alias":"/.*sda2_.*/","color":"#BA43A9"},{"alias":"/.*sda3_.*/","color":"#F4D598"},{"alias":"/.*sdb1.*/","color":"#0A50A1"},{"alias":"/.*sdb2.*/","color":"#BF1B00"},{"alias":"/.*sdb3.*/","color":"#E0752D"},{"alias":"/.*sdc1.*/","color":"#962D82"},{"alias":"/.*sdc2.*/","color":"#614D93"},{"alias":"/.*sdc3.*/","color":"#9AC48A"},{"alias":"/.*sdd1.*/","color":"#65C5DB"},{"alias":"/.*sdd2.*/","color":"#F9934E"},{"alias":"/.*sdd3.*/","color":"#EA6460"},{"alias":"/.*sde1.*/","color":"#E0F9D7"},{"alias":"/.*sdd2.*/","color":"#FCEACA"},{"alias":"/.*sde3.*/","color":"#F9E2D2"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_disk_reads_merged_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":2,"legendFormat":"{{device}} - Read merged","refId":"C","step":4},{"expr":"irate(node_disk_writes_merged_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":2,"legendFormat":"{{device}} - Write merged","refId":"D","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk R/W Merged","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"iops","label":"I/Os","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":3,"gridPos":{"h":10,"w":12,"x":12,"y":417},"id":36,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":true,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*sda_.*/","color":"#7EB26D"},{"alias":"/.*sdb_.*/","color":"#EAB839"},{"alias":"/.*sdc_.*/","color":"#6ED0E0"},{"alias":"/.*sdd_.*/","color":"#EF843C"},{"alias":"/.*sde_.*/","color":"#E24D42"},{"alias":"/.*sda1.*/","color":"#584477"},{"alias":"/.*sda2_.*/","color":"#BA43A9"},{"alias":"/.*sda3_.*/","color":"#F4D598"},{"alias":"/.*sdb1.*/","color":"#0A50A1"},{"alias":"/.*sdb2.*/","color":"#BF1B00"},{"alias":"/.*sdb3.*/","color":"#E0752D"},{"alias":"/.*sdc1.*/","color":"#962D82"},{"alias":"/.*sdc2.*/","color":"#614D93"},{"alias":"/.*sdc3.*/","color":"#9AC48A"},{"alias":"/.*sdd1.*/","color":"#65C5DB"},{"alias":"/.*sdd2.*/","color":"#F9934E"},{"alias":"/.*sdd3.*/","color":"#EA6460"},{"alias":"/.*sde1.*/","color":"#E0F9D7"},{"alias":"/.*sdd2.*/","color":"#FCEACA"},{"alias":"/.*sde3.*/","color":"#F9E2D2"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_disk_io_time_seconds_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":4,"legendFormat":"{{device}} - IO time ms","refId":"A","step":8}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Milliseconds Spent Doing I/Os","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":"Milliseconds","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":427},"id":34,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":true,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*sda_.*/","color":"#7EB26D"},{"alias":"/.*sdb_.*/","color":"#EAB839"},{"alias":"/.*sdc_.*/","color":"#6ED0E0"},{"alias":"/.*sdd_.*/","color":"#EF843C"},{"alias":"/.*sde_.*/","color":"#E24D42"},{"alias":"/.*sda1.*/","color":"#584477"},{"alias":"/.*sda2_.*/","color":"#BA43A9"},{"alias":"/.*sda3_.*/","color":"#F4D598"},{"alias":"/.*sdb1.*/","color":"#0A50A1"},{"alias":"/.*sdb2.*/","color":"#BF1B00"},{"alias":"/.*sdb3.*/","color":"#E0752D"},{"alias":"/.*sdc1.*/","color":"#962D82"},{"alias":"/.*sdc2.*/","color":"#614D93"},{"alias":"/.*sdc3.*/","color":"#9AC48A"},{"alias":"/.*sdd1.*/","color":"#65C5DB"},{"alias":"/.*sdd2.*/","color":"#F9934E"},{"alias":"/.*sdd3.*/","color":"#EA6460"},{"alias":"/.*sde1.*/","color":"#E0F9D7"},{"alias":"/.*sdd2.*/","color":"#FCEACA"},{"alias":"/.*sde3.*/","color":"#F9E2D2"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_disk_io_now{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":4,"legendFormat":"{{device}} - IO now","refId":"A","step":8}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk IOs Current in Progress","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"iops","label":"I/Os","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":427},"id":66,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*sda_.*/","color":"#7EB26D"},{"alias":"/.*sdb_.*/","color":"#EAB839"},{"alias":"/.*sdc_.*/","color":"#6ED0E0"},{"alias":"/.*sdd_.*/","color":"#EF843C"},{"alias":"/.*sde_.*/","color":"#E24D42"},{"alias":"/.*sda1.*/","color":"#584477"},{"alias":"/.*sda2_.*/","color":"#B7DBAB"},{"alias":"/.*sda3_.*/","color":"#F4D598"},{"alias":"/.*sdb1.*/","color":"#0A50A1"},{"alias":"/.*sdb2.*/","color":"#BF1B00"},{"alias":"/.*sdb3.*/","color":"#E0752D"},{"alias":"/.*sdc1.*/","color":"#962D82"},{"alias":"/.*sdc2.*/","color":"#614D93"},{"alias":"/.*sdc3.*/","color":"#9AC48A"},{"alias":"/.*sdd1.*/","color":"#65C5DB"},{"alias":"/.*sdd2.*/","color":"#F9934E"},{"alias":"/.*sdd3.*/","color":"#EA6460"},{"alias":"/.*sde1.*/","color":"#E0F9D7"},{"alias":"/.*sdd2.*/","color":"#FCEACA"},{"alias":"/.*sde3.*/","color":"#F9E2D2"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_textfile_scrape_error{instance=~\"$node:$port\",job=~\"$job\"}","intervalFactor":4,"legendFormat":"Textfile scrape error (1 = true)","refId":"A","step":8}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Open Error File","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Errors","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"Disk Detail","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":31},"id":271,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":3,"description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":428},"id":43,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_filesystem_avail_bytes{instance=~\"$node:$port\",job=~\"$job\",device!~'rootfs'}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"{{mountpoint}} - Available","metric":"","refId":"A","step":4},{"expr":"node_filesystem_free_bytes{instance=~\"$node:$port\",job=~\"$job\",device!~'rootfs'}","format":"time_series","hide":true,"intervalFactor":2,"legendFormat":"{{mountpoint}} - Free","refId":"B","step":2},{"expr":"node_filesystem_size_bytes{instance=~\"$node:$port\",job=~\"$job\",device!~'rootfs'}","format":"time_series","hide":true,"intervalFactor":2,"legendFormat":"{{mountpoint}} - Size","refId":"D","step":2}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Filesystem space available","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":428},"id":41,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":true,"max":true,"min":true,"rightSide":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_filesystem_files_free{instance=~\"$node:$port\",job=~\"$job\",device!~'rootfs'}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"{{mountpoint}} - Free file nodes","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"File Nodes Free","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"File Nodes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":438},"id":28,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_filefd_maximum{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":4,"legendFormat":"Max open files","refId":"A","step":8},{"expr":"node_filefd_allocated{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Open files","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"File Descriptor","tooltip":{"shared":false,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Files","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":438},"id":219,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":true,"max":true,"min":true,"rightSide":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_filesystem_files{instance=~\"$node:$port\",job=~\"$job\",device!~'rootfs'}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"{{mountpoint}} - File nodes total","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"File Nodes Size","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"File Nodes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{"/ ReadOnly":"#890F02"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":448},"id":44,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":true,"hideZero":true,"max":true,"min":true,"rightSide":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":4,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"node_filesystem_readonly{instance=~\"$node:$port\",job=~\"$job\",device!~'rootfs'}","format":"time_series","intervalFactor":2,"legendFormat":"{{mountpoint}} - ReadOnly","refId":"C","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Filesystem in ReadOnly","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Read Only","logBase":1,"max":"1","min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"Filesystem Detail","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":32},"id":272,"panels":[{"aliasColors":{"receive_packets_eth0":"#7EB26D","receive_packets_lo":"#E24D42","transmit_packets_eth0":"#7EB26D","transmit_packets_lo":"#E24D42"},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":449},"id":60,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Trans.*/","transform":"negative-Y"},{"alias":"/.*lo.*/","color":"#7EB26D"},{"alias":"/.*eth0.*/","color":"#EAB839"},{"alias":"/.*eth1.*/","color":"#6ED0E0"},{"alias":"/.*eth2.*/","color":"#EF843C"},{"alias":"/.*eth3.*/","color":"#E24D42"},{"alias":"/.*eth4.*/","color":"#1F78C1"},{"alias":"/.*eth5.*/","color":"#BA43A9"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_network_receive_packets_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Receive","refId":"O","step":4},{"expr":"irate(node_network_transmit_packets_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Transmit","refId":"P","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Traffic by Packets","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":"Packets out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":449},"id":142,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Trans.*/","transform":"negative-Y"},{"alias":"/.*lo.*.errors.*/","color":"#7EB26D"},{"alias":"/.*eth0.*.errors.*/","color":"#EAB839"},{"alias":"/.*eth1.*.errors.*/","color":"#6ED0E0"},{"alias":"/.*eth2.*.errors.*/","color":"#EF843C"},{"alias":"/.*eth3.*.errors.*/","color":"#E24D42"},{"alias":"/.*eth4.*.errors.*/","color":"#1F78C1"},{"alias":"/.*eth5.*.errors.*/","color":"#BA43A9"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_network_receive_errs_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Receive errors","refId":"E","step":4},{"expr":"irate(node_network_transmit_errs_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Rransmit errors","refId":"F","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Traffic Errors","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":"Packets out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":459},"id":143,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Trans.*/","transform":"negative-Y"},{"alias":"/.*lo.*.drop.*/","color":"#7EB26D"},{"alias":"/.*eth0.*.drop.*/","color":"#EAB839"},{"alias":"/.*eth1.*.drop.*/","color":"#6ED0E0"},{"alias":"/.*eth2.*.drop.*/","color":"#EF843C"},{"alias":"/.*eth3.*.drop.*/","color":"#E24D42"},{"alias":"/.*eth4.*.drop.*/","color":"#1F78C1"},{"alias":"/.*eth5.*.drop.*/","color":"#BA43A9"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_network_receive_drop_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Receive drop","refId":"G","step":4},{"expr":"irate(node_network_transmit_drop_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Transmit drop","refId":"H","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Traffic Drop","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":"Packets out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":459},"id":141,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Trans.*/","transform":"negative-Y"},{"alias":"/.*lo.*.compressed.*/","color":"#7EB26D"},{"alias":"/.*eth0.*.compressed.*/","color":"#EAB839"},{"alias":"/.*eth1.*.compressed.*/","color":"#6ED0E0"},{"alias":"/.*eth2.*.compressed.*/","color":"#EF843C"},{"alias":"/.*eth3.*.compressed.*/","color":"#E24D42"},{"alias":"/.*eth4.*.compressed.*/","color":"#1F78C1"},{"alias":"/.*eth5.*.compressed.*/","color":"#BA43A9"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_network_receive_compressed_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Receive compressed","refId":"C","step":4},{"expr":"irate(node_network_transmit_compressed_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Transmit compressed","refId":"D","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Traffic Compressed","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":"Packets out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":469},"id":146,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Trans.*/","transform":"negative-Y"},{"alias":"/.*lo.*.multicast.*/","color":"#7EB26D"},{"alias":"/.*eth0.*.multicast.*/","color":"#EAB839"},{"alias":"/.*eth1.*.multicast.*/","color":"#6ED0E0"},{"alias":"/.*eth2.*.multicast.*/","color":"#EF843C"},{"alias":"/.*eth3.*.multicast.*/","color":"#E24D42"},{"alias":"/.*eth4.*.multicast.*/","color":"#1F78C1"},{"alias":"/.*eth5.*.multicast.*/","color":"#BA43A9"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_network_receive_multicast_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Receive multicast","refId":"M","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Traffic Multicast","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":"Packets out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":469},"id":144,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Trans.*/","transform":"negative-Y"},{"alias":"/.*lo.*.fifo.*/","color":"#7EB26D"},{"alias":"/.*eth0.*.fifo.*/","color":"#EAB839"},{"alias":"/.*eth1.*.fifo.*/","color":"#6ED0E0"},{"alias":"/.*eth2.*.fifo.*/","color":"#EF843C"},{"alias":"/.*eth3.*.fifo.*/","color":"#E24D42"},{"alias":"/.*eth4.*.fifo.*/","color":"#1F78C1"},{"alias":"/.*eth5.*.fifo.*/","color":"#BA43A9"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_network_receive_fifo_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Receive fifo","refId":"I","step":4},{"expr":"irate(node_network_transmit_fifo_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Transmit fifo","refId":"J","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Traffic Fifo","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":"Packets out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":479},"id":145,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Trans.*/","transform":"negative-Y"},{"alias":"/.*lo.*.frame.*/","color":"#7EB26D"},{"alias":"/.*eth0.*.frame.*/","color":"#EAB839"},{"alias":"/.*eth1.*.frame.*/","color":"#6ED0E0"},{"alias":"/.*eth2.*.frame.*/","color":"#EF843C"},{"alias":"/.*eth3.*.frame.*/","color":"#E24D42"},{"alias":"/.*eth4.*.frame.*/","color":"#1F78C1"},{"alias":"/.*eth5.*.frame.*/","color":"#BA43A9"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_network_receive_frame_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"{{device}} - Receive frame","refId":"K","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Traffic Frame","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"pps","label":"Packets out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":479},"id":231,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Trans.*/","transform":"negative-Y"},{"alias":"/.*lo.*.carrier.*/","color":"#7EB26D"},{"alias":"/.*eth0.*.carrier.*/","color":"#EAB839"},{"alias":"/.*eth1.*.carrier.*/","color":"#6ED0E0"},{"alias":"/.*eth2.*.carrier.*/","color":"#EF843C"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_network_transmit_carrier_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Statistic transmit_carrier","refId":"C","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Traffic Carrier","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":489},"id":232,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Trans.*/","transform":"negative-Y"},{"alias":"/.*lo.*.carrier.*/","color":"#7EB26D"},{"alias":"/.*eth0.*.carrier.*/","color":"#EAB839"},{"alias":"/.*eth1.*.carrier.*/","color":"#6ED0E0"},{"alias":"/.*eth2.*.carrier.*/","color":"#EF843C"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_network_transmit_colls_total{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{device}} - Transmit colls","refId":"C","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network Traffic Colls","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":489},"id":61,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"NF conntrack limit","color":"#890F02","fill":0}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_nf_conntrack_entries{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"NF conntrack entries","refId":"O","step":4},{"expr":"node_nf_conntrack_entries_limit{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"NF conntrack limit","refId":"P","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"NF Contrack","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Entries","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":499},"id":230,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"rightSide":false,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_arp_entries{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{ device }} - ARP entries","refId":"O","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ARP Entries","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Entries","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"Network Traffic Detail","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":33},"id":273,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":500},"id":63,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_sockstat_TCP_alloc{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"TCP_alloc - Allocated sockets","refId":"D","step":240},{"expr":"node_sockstat_TCP_inuse{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"TCP_inuse - Tcp sockets currently in use","refId":"E","step":240},{"expr":"node_sockstat_TCP_mem{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"TCP_mem - Used memory for tcp","refId":"F","step":240},{"expr":"node_sockstat_TCP_orphan{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"TCP_orphan - Orphan sockets","refId":"H","step":240},{"expr":"node_sockstat_TCP_tw{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"TCP_tw - Sockets wating close","refId":"I","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Sockstat TCP","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Sockets","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":500},"id":124,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_sockstat_UDPLITE_inuse{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"UDPLITE_inuse - Udplite sockets currently in use","refId":"J","step":240},{"expr":"node_sockstat_UDP_inuse{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"UDP_inuse - Udp sockets currently in use","refId":"K","step":240},{"expr":"node_sockstat_UDP_mem{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"UDP_mem - Used memory for udp","refId":"L","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Sockstat UDP","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Sockets","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":510},"id":126,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_sockstat_sockets_used{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"Sockets_used - Sockets currently in use","refId":"N","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Sockstat Used","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Sockets","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":510},"id":220,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_sockstat_TCP_mem_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"TCP_mem_bytes - ","refId":"G","step":240},{"expr":"node_sockstat_UDP_mem_bytes{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"UDP_mem_bytes - ","refId":"A","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Sockstat Memory Size","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":"Bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":520},"id":125,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_sockstat_FRAG_inuse{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"FRAG_inuse - Frag sockets currently in use","refId":"A","step":240},{"expr":"node_sockstat_FRAG_memory{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"FRAG_memory - Used memory for frag","refId":"B","step":240},{"expr":"node_sockstat_RAW_inuse{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","intervalFactor":2,"legendFormat":"RAW_inuse - Raw sockets currently in use","refId":"C","step":240}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Sockstat FRAG / RAW","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Sockets","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"Network Sockstat","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":34},"id":274,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":521},"height":"","id":49,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":null,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*Discards.*/","color":"#7EB26D"},{"alias":"/.*NoRoutes.*/","color":"#EAB839"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Ip_InReceives{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"InReceives - IP inreceives","refId":"A","step":4},{"expr":"irate(node_netstat_Ip_DefaultTTL{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":true,"intervalFactor":2,"legendFormat":"DefaultTTL - Default TTL","refId":"B","step":10},{"expr":"irate(node_netstat_Ip_InDelivers{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InDelivers - IP indelivers","refId":"I","step":4},{"expr":"irate(node_netstat_Ip_OutRequests{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"OutRequests - IP outrequests","refId":"P","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Netstat IP In / Out","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Datagrams out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":521},"height":"","id":221,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*Octets.*/","color":"#7EB26D"},{"alias":"/.*McastPkts.*/","color":"#EAB839"},{"alias":"/.*McastOctets.*/","color":"#6ED0E0"},{"alias":"/.*BcastPkts.*/","color":"#EF843C"},{"alias":"/.*BcastOctets.*/","color":"#E24D42"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_IpExt_InOctets{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InOctets - Received octets","refId":"K","step":4},{"expr":"irate(node_netstat_IpExt_OutOctets{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutOctets - Sent octets","refId":"Q","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Netstat IP In / Out Octets","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Octects out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":531},"height":"","id":119,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*Octets.*/","color":"#7EB26D"},{"alias":"/.*McastPkts.*/","color":"#EAB839"},{"alias":"/.*McastOctets.*/","color":"#6ED0E0"},{"alias":"/.*BcastPkts.*/","color":"#EF843C"},{"alias":"/.*BcastOctets.*/","color":"#E24D42"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_IpExt_InBcastPkts{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InBcastPkts - Received IP broadcast datagrams","refId":"B","step":4},{"expr":"irate(node_netstat_IpExt_OutBcastPkts{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutBcastPkts - Sent IP broadcast datagrams","refId":"N","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Netstat IP Bcast","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Datagrams out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":531},"height":"","id":222,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*Octets.*/","color":"#7EB26D"},{"alias":"/.*McastPkts.*/","color":"#EAB839"},{"alias":"/.*McastOctets.*/","color":"#6ED0E0"},{"alias":"/.*BcastPkts.*/","color":"#EF843C"},{"alias":"/.*BcastOctets.*/","color":"#E24D42"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_IpExt_InBcastOctets{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InBcastOctets - Received IP broadcast octets","refId":"A","step":4},{"expr":"irate(node_netstat_IpExt_OutBcastOctets{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutBcastOctets - Sent IP broadcast octects","refId":"M","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Netstat IP Bcast Octets","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Octets out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":541},"height":"","id":120,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*Octets.*/","color":"#7EB26D"},{"alias":"/.*McastPkts.*/","color":"#EAB839"},{"alias":"/.*McastOctets.*/","color":"#6ED0E0"},{"alias":"/.*BcastPkts.*/","color":"#EF843C"},{"alias":"/.*BcastOctets.*/","color":"#E24D42"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_IpExt_InMcastPkts{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"InMcastPkts - Received IP multicast datagrams","refId":"H","step":4},{"expr":"irate(node_netstat_IpExt_OutMcastPkts{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"OutMcastPkts - Sent IP multicast datagrams","refId":"P","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Netstat IP Mcast","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Datagrams out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":541},"height":"","id":223,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*Octets.*/","color":"#7EB26D"},{"alias":"/.*McastPkts.*/","color":"#EAB839"},{"alias":"/.*McastOctets.*/","color":"#6ED0E0"},{"alias":"/.*BcastPkts.*/","color":"#EF843C"},{"alias":"/.*BcastOctets.*/","color":"#E24D42"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_IpExt_InMcastOctets{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"InMcastOctets - Received IP multicast octets","refId":"G","step":4},{"expr":"irate(node_netstat_IpExt_OutMcastOctets{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"OutMcastOctets - Sent IP multicast octets","refId":"O","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Netstat IP Mcast Octets","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Octets out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":551},"height":"","id":81,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":null,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Ip_ForwDatagrams{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"ForwDatagrams - IP outforwdatagrams","refId":"C","step":4},{"expr":"irate(node_netstat_Ip_Forwarding{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"Forwarding - IP forwarding","refId":"D","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Netstat IP Forwarding","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Datagrams","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":551},"height":"","id":122,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":null,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Ip_FragCreates{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"FragCreates - IP fragmentation creations","refId":"E","step":4},{"expr":"irate(node_netstat_Ip_FragFails{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"FragFails - IP fragmentation failures","refId":"F","step":4},{"expr":"irate(node_netstat_Ip_FragOKs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"FragOKs - IP fragmentation oks","refId":"G","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Netstat IP Fragmented","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Datagrams","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":561},"height":"","id":51,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_IpExt_InCEPkts{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InCEPkts - Congestion Experimented datagrams in","refId":"C","step":4},{"expr":"irate(node_netstat_IpExt_InECT0Pkts{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InECT0Pkts - Datagrams received with ECT(0)","refId":"E","step":4},{"expr":"irate(node_netstat_IpExt_InECT1Pkts{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InECT1Pkt - Datarams received with ECT(1)","refId":"F","step":4},{"expr":"irate(node_netstat_IpExt_InNoECTPkts{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InNoECTPkts - Datagrams received with NOECT","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Netstat IP ECT / CEP","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Datagrams","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":561},"height":"","id":123,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":null,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Ip_ReasmFails{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"ReasmFails - IP reassembly failures","refId":"Q","step":4},{"expr":"irate(node_netstat_Ip_ReasmOKs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"ReasmOKs - IP reassembly oks","refId":"R","step":4},{"expr":"irate(node_netstat_Ip_ReasmReqds{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"ReasmReqds - IP reassembly requireds","refId":"S","step":4},{"expr":"irate(node_netstat_Ip_ReasmTimeout{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"ReasmTimeout - IP reasmtimeout","refId":"T","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Netstat IP Reasambled","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Datagrams","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":571},"height":"","id":118,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sideWidth":300,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":null,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*Discards.*/","color":"#7EB26D"},{"alias":"/.*NoRoutes.*/","color":"#EAB839"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Ip_InDiscards{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InDiscards - IP indiscards","refId":"J","step":4},{"expr":"irate(node_netstat_Ip_InHdrErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InHdrErrors - IP inhdrerrors","refId":"K","step":4},{"expr":"irate(node_netstat_Ip_InUnknownProtos{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InUnknownProtos - IP inunknownprotos","refId":"M","step":4},{"expr":"irate(node_netstat_Ip_OutDiscards{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutDiscards - IP outdiscards","refId":"N","step":4},{"expr":"irate(node_netstat_Ip_OutNoRoutes{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutNoRoutes - IP outnoroutes","refId":"O","step":4},{"expr":"irate(node_netstat_IpExt_InNoRoutes{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InNoRoutes - IP datagrams discarded due to no routes in forwarding path","refId":"C","step":4},{"expr":"irate(node_netstat_IpExt_InCsumErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InCsumErrors - IP datagrams with checksum errors","refId":"A","step":4},{"expr":"irate(node_netstat_IpExt_InTruncatedPkts{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InTruncatedPkts - IP datagrams discarded due to frame not carrying enough data","refId":"B","step":4},{"expr":"irate(node_netstat_Ip_InAddrErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InAddrErrors - IP inaddrerrors","refId":"D","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Netstat IP Errors / Discards","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Datagrams out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"Network Netstat","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":35},"id":275,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":572},"height":"","id":52,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":true,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*InSegs.*/","color":"#CCA300"},{"alias":"/.*OutSegs.*/","color":"#CCA300"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Tcp_InCsumErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"InCsumErrors - Segments received with checksum errors","refId":"E","step":4},{"expr":"irate(node_netstat_Tcp_InErrs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"InErrs - Segments received in error (e.g., bad TCP checksums)","refId":"F","step":4},{"expr":"irate(node_netstat_Tcp_InSegs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"InSegs - Segments received, including those received in error. This count includes segments received on currently established connections","refId":"G","step":4},{"expr":"irate(node_netstat_Tcp_OutRsts{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"OutRsts - Segments sent containing the RST flag","refId":"I","step":4},{"expr":"irate(node_netstat_Tcp_OutSegs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"OutSegs - Segments sent, including those on current connections but excluding those containing only retransmitted octets","refId":"J","step":4},{"expr":"irate(node_netstat_Tcp_RetransSegs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"RetransSegs - Segments retransmitted - that is, the number of TCP segments transmitted containing one or more previously transmitted octets","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Segments","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Segments out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":572},"height":"","id":85,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*MaxConn *./","color":"#890F02","fill":0}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_netstat_Tcp_CurrEstab{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"CurrEstab - TCP connections for which the current state is either ESTABLISHED or CLOSE- WAIT","refId":"C","step":4},{"expr":"node_netstat_Tcp_MaxConn{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"MaxConn - Limit on the total number of TCP connections the entity can support (Dinamic is \"-1\")","refId":"H","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Connections","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Connections","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":582},"height":"","id":86,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_netstat_Tcp_RtoAlgorithm{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":true,"intervalFactor":2,"legendFormat":"RtoAlgorithm - Algorithm used to determine the timeout value used for retransmitting unacknowledged octets","refId":"M","step":4},{"expr":"node_netstat_Tcp_RtoMax{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"RtoMax - Maximum value permitted by a TCP implementation for the retransmission timeout, measured in milliseconds","refId":"N","step":4},{"expr":"node_netstat_Tcp_RtoMin{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"RtoMin - Minimum value permitted by a TCP implementation for the retransmission timeout, measured in milliseconds","refId":"O","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Retransmission","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ms","label":"Milliseconds","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":582},"height":"","id":82,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Tcp_ActiveOpens{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"ActiveOpens - TCP connections that have made a direct transition to the SYN-SENT state from the CLOSED state","refId":"A","step":4},{"expr":"irate(node_netstat_Tcp_AttemptFails{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"AttemptFails - TCP connections that have made a direct transition to the CLOSED state from either the SYN-SENT and SYN-RCVD","refId":"B","step":4},{"expr":"irate(node_netstat_Tcp_EstabResets{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"EstabResets - TCP connections that have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state","refId":"D","step":4},{"expr":"irate(node_netstat_Tcp_PassiveOpens{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"PassiveOpens - TCP connections that have made a direct transition to the SYN-RCVD state from the LISTEN state","refId":"K","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Direct Transition","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Connections","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"Network Netstat TCP","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":36},"id":276,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":583},"height":"","id":94,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPAbortOnClose{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPAbortOnClose - Connections aborted due to early user close","refId":"V","step":4},{"expr":"irate(node_netstat_TcpExt_TCPAbortOnData{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPAbortOnData - Connections aborted due to unexpected data","refId":"W","step":4},{"expr":"irate(node_netstat_TcpExt_TCPAbortOnLinger{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPAbortOnLinger - Connections aborted in lingered state after being closed","refId":"X","step":4},{"expr":"irate(node_netstat_TcpExt_TCPAbortOnMemory{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPAbortOnMemory - Connections aborted before attached to a socket","refId":"Y","step":4},{"expr":"irate(node_netstat_TcpExt_TCPAbortOnTimeout{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPAbortOnTimeout - Connections aborted due timeout","refId":"B","step":4},{"expr":"irate(node_netstat_TcpExt_TCPAbortFailed{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPAbortFailed - Connections aborted without send RST due insuffient memory","refId":"A","step":4},{"expr":"irate(node_netstat_TcpExt_TCPTimeouts{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPTimeouts - Other TCP connections timeouts","refId":"C","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Aborts / Tiemouts","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Connections","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":583},"height":"","id":92,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_DelayedACKLocked{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"DelayedACKLocked - Delayed acks further delayed because of locked socket","refId":"C","step":4},{"expr":"irate(node_netstat_TcpExt_DelayedACKLost{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"DelayedACKLost - Times quick ack mode was activated","refId":"D","step":4},{"expr":"irate(node_netstat_TcpExt_DelayedACKs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"DelayedACKs - Delayed acks sent","refId":"E","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Delayed ACK","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":593},"height":"","id":91,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Sent.*/","transform":"negative-Y"},{"alias":"SyncookiesSent - SYN cookies sent","color":"#E0F9D7"},{"alias":"SyncookiesRecv - SYN cookies received","color":"#E0F9D7"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_SyncookiesFailed{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"SyncookiesFailed - Invalid SYN cookies received","refId":"R","step":4},{"expr":"irate(node_netstat_TcpExt_SyncookiesRecv{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"SyncookiesRecv - SYN cookies received","refId":"S","step":4},{"expr":"irate(node_netstat_TcpExt_SyncookiesSent{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"SyncookiesSent - SYN cookies sent","refId":"T","step":4},{"expr":"irate(node_netstat_TcpExt_TCPSYNChallenge{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"SynChallenge - Challenge ACKs sent in response to SYN packets","refId":"A","step":4},{"expr":"irate(node_netstat_TcpExt_TCPChallengeACK{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":2,"legendFormat":"TCPChallengeACK - Challenge ACKs sent (RFC 5961 3.2)","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP SynCookie / Challenge","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":593},"height":"","id":90,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPLossFailures{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPLossFailures - Timeouts in loss state","refId":"C","step":4},{"expr":"irate(node_netstat_TcpExt_TCPLossProbeRecovery{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPLossProbeRecovery - TCP loss probe recoveries","refId":"D","step":4},{"expr":"irate(node_netstat_TcpExt_TCPLossProbes{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPLossProbes - Sent TCP loss probes","refId":"A","step":4},{"expr":"irate(node_netstat_TcpExt_TCPLossUndo{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPLossUndo - Congestion windows recovered without slow start after partial ack","refId":"B","step":4},{"expr":"irate(node_netstat_TcpExt_TCPLostRetransmit{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPLostRetransmit - Retransmits lost","refId":"E","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP LOSS","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":603},"height":"","id":53,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_ListenDrops{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"ListenDrops - SYNs to LISTEN sockets ignored","refId":"H","step":4},{"expr":"irate(node_netstat_TcpExt_LockDroppedIcmps{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"LockDroppedIcmps - ICMP packets dropped because socket was locked","refId":"J","step":4},{"expr":"irate(node_netstat_TcpExt_TCPDeferAcceptDrop{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPDeferAcceptDrop - Dropped ACK frames received by a socket in SYN_RECV state","refId":"D","step":4},{"expr":"irate(node_netstat_TcpExt_TCPBacklogDrop{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPBacklogDrop - Packets dropped bacause the socket's receive queue was full","refId":"P","step":4},{"expr":"irate(node_netstat_TcpExt_OutOfWindowIcmps{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutOfWindowIcmps - ICMP packets dropped because they were out-of-window","refId":"A","step":4},{"expr":"irate(node_netstat_TcpExt_TCPMinTTLDrop{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPMinTTLDrop - TCP packets dropped under minTTL condition","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP DROPS","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":603},"height":"","id":101,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPForwardRetrans{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPForwardRetrans - Packets losts retransmitted with Forward RTO-Recovery","refId":"O","step":4},{"expr":"irate(node_netstat_TcpExt_TCPSlowStartRetrans{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPSlowStartRetrans - Packets losts retransmitted after a slow start","refId":"P","step":4},{"expr":"irate(node_netstat_TcpExt_TCPSynRetrans{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPSynRetrans - SYN-SYN/ACK retransmits to break down retransmissions in SYN, fast/timeout retransmits","refId":"Q","step":4},{"expr":"irate(node_netstat_TcpExt_TCPSpuriousRTOs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPSpuriousRTOs - FRTO's successfully detected spurious RTOs","refId":"A","step":4},{"expr":"irate(node_netstat_TcpExt_TCPSpuriousRtxHostQueues{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPSpuriousRtxHostQueues - Times detected that the fast clone is not yet freed in tcp_transmit_skb()","refId":"B","step":4},{"expr":"irate(node_netstat_TcpExt_TCPFullUndo{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPFullUndo - Retransmits that undid the CWND reduction","refId":"C","step":4},{"expr":"irate(node_netstat_TcpExt_TCPRetransFail{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPRetransFail - Failed tcp_retransmit_skb() calls","refId":"D","step":4},{"expr":"irate(node_netstat_TcpExt_TCPPartialUndo{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPPartialUndo - Congestion windows partially recovered using Hoe heuristic","refId":"E","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Retrans","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":613},"height":"","id":87,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_PruneCalled{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"PruneCalled - Pkts pruned from recv queue because of soc buf overrun","refId":"P","step":4},{"expr":"irate(node_netstat_TcpExt_RcvPruned{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"RcvPruned - Packets pruned from receive queue","refId":"Q","step":4},{"expr":"irate(node_netstat_TcpExt_OfoPruned{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OfoPruned - Packets dropped from out-of-order queue because of socket buffer overrun","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Pruned","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":613},"height":"","id":96,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPDirectCopyFromBacklog{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPDirectCopyFromBacklog - Packets directly received from backlog","refId":"M","step":4},{"expr":"irate(node_netstat_TcpExt_TCPDirectCopyFromPrequeue{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPDirectCopyFromPrequeue - Packets directly received from prequeue","refId":"N","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Direct Copy","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":623},"height":"","id":100,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TW{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TW - TCP sockets finished time wait in fast timer","refId":"O","step":4},{"expr":"irate(node_netstat_TcpExt_TWKilled{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TWKilled - TCP sockets finished time wait in slow timer","refId":"P","step":4},{"expr":"irate(node_netstat_TcpExt_TWRecycled{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TWRecycled - Time wait sockets recycled by time stamp","refId":"Q","step":4},{"expr":"irate(node_netstat_TcpExt_TCPTimeWaitOverflow{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPTimeWaitOverflow - Occurences of time wait bucket overflow","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP TimeWait","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":623},"height":"","id":93,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_PAWSActive{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"PAWSActive - Active connections rejected because of time stamp","refId":"M","step":4},{"expr":"irate(node_netstat_TcpExt_PAWSEstab{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"PAWSEstab - Pkts rejects in estab connections because of timestamp","refId":"N","step":4},{"expr":"irate(node_netstat_TcpExt_PAWSPassive{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"PAWSPassive - Passive connections rejected because of time stamp","refId":"O","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP PAWS","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":633},"height":"","id":98,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPSackRecovery{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPSackRecovery - Times recovered from packet loss by selective acknowledgements","refId":"E","step":4},{"expr":"irate(node_netstat_TcpExt_TCPSackRecoveryFail{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPSackRecoveryFail - Issue while recovering packets lost using selective ACK","refId":"F","step":4},{"expr":"irate(node_netstat_TcpExt_TCPSackShiftFallback{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPSackShiftFallback - SACKs fallbacks","refId":"G","step":4},{"expr":"irate(node_netstat_TcpExt_TCPSackShifted{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPSackShifted - SACKs shifted","refId":"H","step":4},{"expr":"irate(node_netstat_TcpExt_TCPSACKDiscard{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPSackDiscard - Discarded due invalid SACK block.","refId":"A","step":4},{"expr":"irate(node_netstat_TcpExt_TCPSackFailures{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPSackFailures - Timeouts after SACK recovery","refId":"B","step":4},{"expr":"irate(node_netstat_TcpExt_TCPSackMerged{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPSackMerged - SACKs merged","refId":"C","step":4},{"expr":"irate(node_netstat_TcpExt_TCPSACKReneging{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPSACKReneging - Bad SACK blocks received","refId":"D","step":4},{"expr":"irate(node_netstat_TcpExt_TCPSACKReorder{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPSACKReorder - Times detected reordering using SACK","refId":"I","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP SACK","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":633},"height":"","id":95,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPDSACKIgnoredOld{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPDSACKIgnoredOld - Discarded packets with duplicate SACK while retransmitting","refId":"F","step":4},{"expr":"irate(node_netstat_TcpExt_TCPDSACKOfoRecv{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPDSACKOfoRecv - DSACKs for out of order packets received","refId":"G","step":4},{"expr":"irate(node_netstat_TcpExt_TCPDSACKOfoSent{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPDSACKOfoSent - DSACKs sent for out of order packets","refId":"H","step":4},{"expr":"irate(node_netstat_TcpExt_TCPDSACKOldSent{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPDSACKOldSent - DSACKs sent for old packets","refId":"I","step":4},{"expr":"irate(node_netstat_TcpExt_TCPDSACKRecv{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPDSACKRecv - DSACKs received","refId":"J","step":4},{"expr":"irate(node_netstat_TcpExt_TCPDSACKUndo{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPDSACKUndo - Discarded packets with erroneous retransmit","refId":"K","step":4},{"expr":"irate(node_netstat_TcpExt_TCPDSACKIgnoredNoUndo{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPDSACKIgnoredNoUndo - Discarded packets with duplicate SACK","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP DSACK","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":643},"height":"","id":97,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPFastOpenActive{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPFastOpenActive - Successful outbound TFO connections","refId":"P","step":4},{"expr":"irate(node_netstat_TcpExt_TCPFastOpenActiveFail{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPFastOpenActiveFail - SYN-ACK packets received that did not acknowledge data sent in the SYN packet and caused a retransmissions without SYN data","refId":"Q","step":4},{"expr":"irate(node_netstat_TcpExt_TCPFastOpenCookieReqd{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPFastOpenCookieReqd - Inbound SYN packets requesting TFO with TFO set but no cookie","refId":"R","step":4},{"expr":"irate(node_netstat_TcpExt_TCPFastOpenListenOverflow{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPFastOpenListenOverflow - Times the fastopen listen queue overflowed","refId":"S","step":4},{"expr":"irate(node_netstat_TcpExt_TCPFastOpenPassive{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPFastOpenPassive - Successful inbound TFO connections","refId":"T","step":4},{"expr":"irate(node_netstat_TcpExt_TCPFastOpenPassiveFail{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPFastOpenPassiveFail - Inbound SYN packets with TFO cookie that was invalid","refId":"U","step":4},{"expr":"irate(node_netstat_TcpExt_TCPFastRetrans{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPFastRetrans - Packets lost fast-retransmitted","refId":"V","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP FastOpen / FastRetrans","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":643},"height":"","id":99,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPHPAcks{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPHPAcks - Acknowledgments not containing data received","refId":"Z","step":4},{"expr":"irate(node_netstat_TcpExt_TCPHPHits{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPHPHits - Packets header predicted","refId":"A","step":4},{"expr":"irate(node_netstat_TcpExt_TCPHPHitsToUser{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPHPHitsToUser - Packets header predicted and directly queued to user","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP HP","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":653},"height":"","id":102,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPToZeroWindowAdv{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPToZeroWindowAdv - Times window went from zero to non-zero","refId":"V","step":4},{"expr":"irate(node_netstat_TcpExt_TCPWantZeroWindowAdv{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPWantZeroWindowAdv - Times zero window announced","refId":"W","step":4},{"expr":"irate(node_netstat_TcpExt_TCPFromZeroWindowAdv{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPFromZeroWindowAdv - Times window went from zero to non-zero","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP ZeroWindow","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":653},"height":"","id":103,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPFACKReorder{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPFACKReorder - Detected packets with re-ordering using FACK","refId":"E","step":4},{"expr":"irate(node_netstat_TcpExt_TCPTSReorder{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPTSReorder - Times detected packets with re-ordering using timestamp option","refId":"S","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Reorder","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":663},"height":"","id":162,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPRenoFailures{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPRenoFailures - Timeouts after reno fast retransmit","refId":"O","step":4},{"expr":"irate(node_netstat_TcpExt_TCPRenoRecovery{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPRenoRecovery - Times recovered from packet loss due to fast retransmit","refId":"A","step":4},{"expr":"irate(node_netstat_TcpExt_TCPRenoRecoveryFail{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPRenoRecoveryFail - Times reno fast retransmits failed","refId":"B","step":4},{"expr":"irate(node_netstat_TcpExt_TCPRenoReorder{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPRenoReorder - Times detected reordering using reno fast retransmit","refId":"C","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Reno","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":663},"height":"","id":163,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPReqQFullDoCookies{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPReqQFullDoCookies - Times SYNCOOKIE was replied to client","refId":"O","step":4},{"expr":"irate(node_netstat_TcpExt_TCPReqQFullDrop{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPReqQFullDrop - Times SYN request was dropped due to disabled syncookies","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP ReqQ","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":673},"height":"","id":164,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPOFODrop{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPOFODrop - Packets meant to be queued in OFO but dropped because socket rcvbuf limit reached","refId":"P","step":4},{"expr":"irate(node_netstat_TcpExt_TCPOFOMerge{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPOFOMerge - Packets in OFO that were merged with other packets","refId":"Q","step":4},{"expr":"irate(node_netstat_TcpExt_TCPOFOQueue{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPOFOQueue - Packets queued in OFO queue","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Out of order","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":673},"height":"","id":165,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPMD5NotFound{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPMD5NotFound - Times MD5 hash expected but not found","refId":"Z","step":4},{"expr":"irate(node_netstat_TcpExt_TCPMD5Unexpected{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPMD5Unexpected - Times MD5 hash unexpected but found","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP MD5","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":683},"height":"","id":166,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPPrequeued{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPPrequeued - Packets directly queued to recvmsg prequeue","refId":"Z","step":4},{"expr":"irate(node_netstat_TcpExt_TCPPrequeueDropped{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPPrequeueDropped - Packets dropped from prequeue","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Prequeued","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":683},"height":"","id":167,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPRcvCoalesce{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPRcvCoalesce - Times tried to coalesce the receive queue","refId":"Z","step":4},{"expr":"irate(node_netstat_TcpExt_TCPRcvCollapsed{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"TCPRcvCollapsed - Packets collapsed in receive queue due to low socket buffer","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Rcv","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":693},"height":"","id":224,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPOrigDataSent{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPOrigDataSent - Outgoing packets with original data","refId":"C","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Original Data","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":693},"height":"","id":225,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_ArpFilter{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"ArpFilter - Arp packets filtered","refId":"A","step":4},{"expr":"irate(node_netstat_TcpExt_IPReversePathFilter{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"IPReversePathFilter - Packets arrive from non directly connected network","refId":"G","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Filters","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":703},"height":"","id":226,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPPureAcks{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPPureAcks - Acknowledgments not containing data payload received","refId":"C","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Pure ACK","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":703},"height":"","id":227,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_TCPAutoCorking{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"TCPAutoCorking - Times stack detected skb was underused and its flush was deferred","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Auto Corking","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":713},"height":"","id":104,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_TcpExt_BusyPollRxPackets{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"BusyPollRxPackets - Low latency application-fetched packets","refId":"B","step":4},{"expr":"irate(node_netstat_TcpExt_EmbryonicRsts{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"EmbryonicRsts - Resets received for embryonic SYN_RECV sockets","refId":"F","step":4},{"expr":"irate(node_netstat_TcpExt_ListenOverflows{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"ListenOverflows - Times the listen queue of a socket overflowed","refId":"I","step":4},{"expr":"irate(node_netstat_TcpExt_TCPSchedulerFailed{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":2,"legendFormat":"TCPSchedulerFailed - Times receiver scheduled too late for direct processing","refId":"A","step":4},{"expr":"irate(node_netstat_TcpExt_TCPMemoryPressures{instance=~\"$node:$port\",job=~\"$job\"}[5m])","intervalFactor":2,"legendFormat":"TCPMemoryPressures - TCP ran low on memory","refId":"C","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"TCP Issues","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":"0","show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"Network Netstat TCP Linux MIPs","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":37},"id":277,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":714},"height":"","id":55,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*Snd.*/","transform":"negative-Y"},{"alias":"/.*Datagrams.*/","color":"#EAB839"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Udp_InDatagrams{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InDatagrams - Datagrams received","refId":"B","step":4},{"expr":"irate(node_netstat_Udp_OutDatagrams{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutDatagrams - Datagrams sent","refId":"E","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"UDP In / Out","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Datagrams out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":714},"height":"","id":109,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*Snd.*/","transform":"negative-Y"},{"alias":"/.*bufErrors.*/","color":"#70DBED"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Udp_InCsumErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InCsumErrors - Datagrams with checksum errors","refId":"A","step":4},{"expr":"irate(node_netstat_Udp_InErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InErrors - Datagrams that could not be delivered to an application","refId":"C","step":4},{"expr":"irate(node_netstat_Udp_RcvbufErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"RcvbufErrors - Datagrams for which not enough socket buffer memory to receive","refId":"F","step":4},{"expr":"irate(node_netstat_Udp_SndbufErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"SndbufErrors - Datagrams for which not enough socket buffer memory to transmit","refId":"G","step":4},{"expr":"irate(node_netstat_Udp_NoPorts{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"NoPorts - Datagrams received on a port with no listener","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"UDP Errors","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Datagrams out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":724},"height":"","id":57,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*Datagrams.*/","color":"#EAB839"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_UdpLite_InDatagrams{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InDatagrams - Packets received","refId":"B","step":4},{"expr":"irate(node_netstat_UdpLite_OutDatagrams{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutDatagrams - Packets sent","refId":"E","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"UDP Lite In / Out","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Datagrams out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":724},"height":"","id":110,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*bufErrors.*/","color":"#6ED0E0"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_UdpLite_InCsumErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InCsumErrors - Datagrams with checksum errors","refId":"A","step":4},{"expr":"irate(node_netstat_UdpLite_InErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InErrors - Datagrams that could not be delivered to an application","refId":"C","step":4},{"expr":"irate(node_netstat_UdpLite_RcvbufErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"RcvbufErrors - Datagrams for which not enough socket buffer memory to receive","refId":"F","step":4},{"expr":"irate(node_netstat_UdpLite_SndbufErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"SndbufErrors - Datagrams for which not enough socket buffer memory to transmit","refId":"G","step":4},{"expr":"irate(node_netstat_UdpLite_NoPorts{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"NoPorts - Datagrams received on a port with no listener","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"UDP Lite Errors","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Datagrams out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"Network Netstat UDP","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":38},"id":278,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":725},"height":"","id":50,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*Type3.*/","color":"#EAB839"},{"alias":"/.*SrcQuenchs.*/","color":"#705DA0"},{"alias":"/.*ParmProb.*/","color":"#70DBED"},{"alias":"/.*TimeExcds.*/","color":"#EA6460"},{"alias":"/.*DestUnreachs.*/","color":"#7EB26D"},{"alias":"/.*InErrors.*/","color":"#890F02"},{"alias":"/.*OutErrors.*/","color":"#890F02"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Icmp_InErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InErrors - Messages which the entity received but determined as having ICMP-specific errors (bad ICMP checksums, bad length, etc.)","refId":"I","step":4},{"expr":"irate(node_netstat_Icmp_OutErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutErrors - Messages which this entity did not send due to problems discovered within ICMP, such as a lack of buffers","refId":"V","step":4},{"expr":"irate(node_netstat_Icmp_InDestUnreachs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InDestUnreachs - Destination Unreachable messages received","refId":"A","step":4},{"expr":"irate(node_netstat_Icmp_OutDestUnreachs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutDestUnreachs - Destination Unreachable messages sent","refId":"B","step":4},{"expr":"irate(node_netstat_IcmpMsg_InType3{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"InType3 - Destination unreachable","refId":"K","step":4},{"expr":"irate(node_netstat_IcmpMsg_OutType3{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"OutType3 - Destination unreachable","refId":"L","step":4},{"expr":"irate(node_netstat_IcmpMsg_InType11{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"InType11 - Time Exceeded","refId":"C","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ICMP Errors 1","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Messages out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":725},"height":"","id":147,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*Type3.*/","color":"#EAB839"},{"alias":"/.*SrcQuenchs.*/","color":"#705DA0"},{"alias":"/.*ParmProb.*/","color":"#70DBED"},{"alias":"/.*TimeExcds.*/","color":"#EA6460"},{"alias":"/.*DestUnreachs.*/","color":"#7EB26D"},{"alias":"/.*InErrors.*/","color":"#890F02"},{"alias":"/.*OutErrors.*/","color":"#890F02"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Icmp_InCsumErrors{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"InCsumErrors - Messages with ICMP checksum errors","refId":"E","step":4},{"expr":"irate(node_netstat_Icmp_InTimeExcds{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"InTimeExcds - Time Exceeded messages received","refId":"C","step":4},{"expr":"irate(node_netstat_Icmp_OutTimeExcds{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"OutTimeExcds - Time Exceeded messages sent","refId":"D","step":4},{"expr":"irate(node_netstat_Icmp_InParmProbs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"InParmProbs - Parameter Problem messages received","refId":"F","step":4},{"expr":"irate(node_netstat_Icmp_OutParmProbs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"OutParmProb - Parameter Problem messages sent","refId":"G","step":4},{"expr":"irate(node_netstat_Icmp_InSrcQuenchs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"InSrcQuenchs - Source Quench messages received","refId":"H","step":4},{"expr":"irate(node_netstat_Icmp_OutSrcQuenchs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"OutSrcQuenchs - Source Quench messages sent","refId":"J","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ICMP Errors 2","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Messages out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":735},"height":"","id":115,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*Msgs.*/","color":"#6ED0E0"},{"alias":"/.*Redirects.*/","color":"#F9BA8F"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Icmp_InMsgs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InMsgs - Messages which the entity received. Note that this counter includes all those counted by icmpInErrors","refId":"J","step":4},{"expr":"irate(node_netstat_Icmp_InRedirects{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InRedirects - Redirect messages received","refId":"L","step":4},{"expr":"irate(node_netstat_Icmp_OutMsgs{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutMsgs - Messages which this entity attempted to send. Note that this counter includes all those counted by icmpOutErrors","refId":"W","step":4},{"expr":"irate(node_netstat_Icmp_OutRedirects{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutRedirects - Redirect messages sent. For a host, this object will always be zero, since hosts do not send redirects","refId":"Y","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ICMP In / Out - Messages / Redirects","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Messages out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":735},"height":"","id":112,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*TimestampReps.*/","color":"#F9934E"},{"alias":"/.*Timestamps -.*/","color":"#6ED0E0"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Icmp_InTimestampReps{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InTimestampReps - Timestamp Reply messages received","refId":"O","step":4},{"expr":"irate(node_netstat_Icmp_InTimestamps{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InTimestamps - Timestamp (request) messages received","refId":"P","step":4},{"expr":"irate(node_netstat_Icmp_OutTimestampReps{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutTimestampReps - Timestamp Reply messages sent","refId":"A","step":4},{"expr":"irate(node_netstat_Icmp_OutTimestamps{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutTimestamps - Timestamp (request) messages sent","refId":"B","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ICMP Timestamps","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Messages out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":745},"height":"","id":114,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*EchoReps.*/","color":"#D683CE"},{"alias":"/.*Echos -.*/","color":"#F9934E"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Icmp_InEchoReps{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InEchoReps - Echo Reply messages received","refId":"G","step":4},{"expr":"irate(node_netstat_Icmp_InEchos{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InEchos - Echo (request) messages received","refId":"H","step":4},{"expr":"irate(node_netstat_Icmp_OutEchoReps{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutEchoReps - Echo Reply messages sent","refId":"T","step":4},{"expr":"irate(node_netstat_Icmp_OutEchos{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutEchos - Echo (request) messages sent","refId":"U","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ICMP Echos","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Messages out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":null,"fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":745},"height":"","id":113,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideZero":false,"max":true,"min":true,"rightSide":false,"show":true,"sort":"current","sortDesc":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"minSpan":2,"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[{"alias":"/.*Out.*/","transform":"negative-Y"},{"alias":"/.*AddrMaskReps.*/","color":"#B7DBAB"},{"alias":"/.*Masks -.*/","color":"#E5AC0E"}],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"irate(node_netstat_Icmp_InAddrMaskReps{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InAddrMaskReps - Address Mask Reply messages received","refId":"C","step":4},{"expr":"irate(node_netstat_Icmp_InAddrMasks{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"InAddrMasks - Address Mask Request messages received","refId":"D","step":4},{"expr":"irate(node_netstat_Icmp_OutAddrMaskReps{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutAddrMaskReps - Address Mask Reply messages sent","refId":"Q","step":4},{"expr":"irate(node_netstat_Icmp_OutAddrMasks{instance=~\"$node:$port\",job=~\"$job\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"OutAddrMasks - Address Mask Request messages sent","refId":"R","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"ICMP Masks","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Messages out (-) / in (+)","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"Network Netstat ICMP","type":"row"},{"collapsed":true,"gridPos":{"h":1,"w":24,"x":0,"y":39},"id":279,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":0,"y":746},"id":40,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"node_scrape_collector_duration_seconds{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"{{collector}} - Scrape duration","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Node Exporter Scrape Time","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"s","label":"Seconds","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","description":"","fill":2,"gridPos":{"h":10,"w":12,"x":12,"y":746},"id":157,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":true,"min":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"node_scrape_collector_success{instance=~\"$node:$port\",job=~\"$job\"}","format":"time_series","hide":false,"intervalFactor":2,"legendFormat":"{{collector}} - Scrape success","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Node Exporter Scrape Success","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":"Counter","logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"title":"Node Exporter","type":"row"}],"refresh":false,"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[{"allValue":null,"current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"Job","multi":false,"name":"job","options":[],"query":"label_values(node_boot_time_seconds, job)","refresh":1,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"Host:","multi":false,"name":"node","options":[],"query":"label_values(node_boot_time_seconds{job=~\"$job\"}, instance)","refresh":1,"regex":"/([^:]+):.*/","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"Port","multi":false,"name":"port","options":[],"query":"label_values(node_boot_time_seconds{instance=~\"$node:(.*)\"}, instance)","refresh":1,"regex":"/[^:]+:(.*)/","skipUrlSync":false,"sort":3,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-24h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Nodes","uid":"FnxG5TYiz","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false} \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_rancher-components-dashboard.json b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_rancher-components-dashboard.json new file mode 100644 index 0000000000000000000000000000000000000000..73b245b45c658cd34f2746b8dde610c6a10f4048 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/c_rancher-components-dashboard.json @@ -0,0 +1 @@ +{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"A quick dashboard for displaying Fluentd metrics.","editable":true,"gnetId":3522,"graphTooltip":0,"id":null,"links":[],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"fluentd_buffer_queue_length","format":"time_series","intervalFactor":2,"metric":"fluentd_buffer_queue_length","refId":"A","step":2}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Fluentd buffer queue length","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"fluentd_buffer_total_queued_size","format":"time_series","intervalFactor":2,"metric":"fluentd_buffer_total_queued_size","refId":"A","step":2}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Fluentd buffer total queued size","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"cacheTimeout":null,"colorBackground":false,"colorValue":true,"colors":["rgba(245, 54, 54, 0.9)","rgba(45, 170, 3, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","format":"none","gauge":{"maxValue":1,"minValue":0,"show":true,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":8,"w":12,"x":0,"y":14},"id":4,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"fluentd_up","intervalFactor":2,"refId":"A","step":40}],"thresholds":"0,1","title":"Fluentd Up","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","fill":1,"gridPos":{"h":8,"w":12,"x":12,"y":14},"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"fluentd_retry_count{pluginCategory=\"output\",pluginId=\"apache_log\"}","intervalFactor":2,"metric":"fluentd_retry_count","refId":"A","step":4}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Fluentd retry count (apache)","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":false,"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[]},"time":{"from":"2017-10-20T13:00:11.189Z","to":"2017-10-20T13:38:24.045Z"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Rancher Components","uid":"wDHD1TYmz","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false} \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/w_daemonset-dashboard.json b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/w_daemonset-dashboard.json new file mode 100644 index 0000000000000000000000000000000000000000..36202d5f66df95799f92e1d1d6cb2fdf5ca57fe4 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/w_daemonset-dashboard.json @@ -0,0 +1 @@ +{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"Kubernetes DaemonSet Overview","editable":true,"gnetId":6615,"graphTooltip":1,"id":null,"iteration":1543396055929,"links":[],"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","decimals":null,"editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":false},"gridPos":{"h":3,"w":8,"x":0,"y":0},"id":5,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_daemonset_status_desired_number_scheduled{daemonset=\"$daemonset_name\",namespace=\"$daemonset_namespace\"}) without (instance, pod)","format":"time_series","instant":false,"interval":"","intervalFactor":2,"legendFormat":"","refId":"A","step":600}],"thresholds":"","title":"Desired Replicas","transparent":false,"type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":8,"y":0},"id":6,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"min(kube_daemonset_status_number_available{daemonset=\"$daemonset_name\",namespace=\"$daemonset_namespace\"}) without (instance, pod)","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Available Replicas","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":8,"x":16,"y":0},"id":2,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_daemonset_metadata_generation{daemonset=\"$daemonset_name\",namespace=\"$daemonset_namespace\"}) without (instance, pod)","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Metadata Generation","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"datasource":"${DS_PROMETHEUS}","decimals":2,"format":"s","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":6,"w":3,"x":0,"y":3},"id":11,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"time() - max(kube_daemonset_created{daemonset=~\"$daemonset_name\"})","format":"time_series","intervalFactor":1,"refId":"A"}],"thresholds":"","title":"DaemonSet Create Time","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":6,"w":7,"x":3,"y":3},"id":8,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"cores","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$daemonset_namespace\",pod_name=~\"$daemonset_name.*\"}[2m]))","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Total CPU","type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"bytes","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":6,"w":7,"x":10,"y":3},"id":9,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"80%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(container_memory_working_set_bytes{namespace=\"$daemonset_namespace\",pod_name=~\"$daemonset_name.*\", container_name!=\"POD\"})","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Total Memory","type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"Bps","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":false},"gridPos":{"h":6,"w":7,"x":17,"y":3},"id":7,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(rate(container_network_transmit_bytes_total{namespace=\"$daemonset_namespace\",pod_name=~\"$daemonset_name.*\"}[2m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$daemonset_namespace\",pod_name=~\"$daemonset_name.*\"}[2m]))","format":"time_series","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Total Network","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"current"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":12,"x":0,"y":9},"id":13,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":false,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (pod_name) (rate(container_cpu_usage_seconds_total{namespace=\"$daemonset_namespace\",pod_name=~\"$daemonset_name.*\"}[2m]))","format":"time_series","intervalFactor":2,"legendFormat":"{{pod_name}}","refId":"A","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":null,"format":"short","label":"","logBase":1,"min":"0","show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":12,"x":12,"y":9},"id":15,"isNew":false,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":false,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (pod_name) (container_memory_working_set_bytes{namespace=\"$daemonset_namespace\",pod_name=~\"$daemonset_name.*\", container_name!=\"POD\"})","format":"time_series","interval":"10s","intervalFactor":1,"legendFormat":"{{ pod_name }}","refId":"A","step":15}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"max":null,"min":"0","show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","decimals":0,"editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":9,"w":24,"x":0,"y":16},"id":1,"isNew":true,"legend":{"alignAsTable":true,"avg":false,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"avg(kube_daemonset_status_number_ready{daemonset=\"$daemonset_name\",namespace=\"$daemonset_namespace\"}) without (instance, pod)","format":"time_series","intervalFactor":1,"legendFormat":"Ready","refId":"A"},{"expr":"avg(kube_daemonset_status_number_available{daemonset=\"$daemonset_name\",namespace=\"$daemonset_namespace\"}) without (instance, pod)","format":"time_series","hide":false,"intervalFactor":1,"legendFormat":"Available","refId":"C"},{"expr":"avg(kube_daemonset_status_number_unavailable{daemonset=\"$daemonset_name\",namespace=\"$daemonset_namespace\"}) without (instance, pod)","format":"time_series","hide":false,"intervalFactor":1,"legendFormat":"Unavailable","refId":"B"},{"expr":"avg(kube_daemonset_status_number_misscheduled{daemonset=\"$daemonset_name\",namespace=\"$daemonset_namespace\"}) without (instance, pod)","format":"time_series","hide":false,"intervalFactor":1,"legendFormat":"Misscheduled","refId":"D"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Replicas Status","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"decimals":0,"format":"none","label":"","logBase":1,"show":true},{"format":"short","label":"","logBase":1,"show":false}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":"30s","schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[{"allValue":".*","current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"Namespace","multi":false,"name":"daemonset_namespace","options":[],"query":"label_values(kube_daemonset_metadata_generation, namespace)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":null,"tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"DaemonSet","multi":false,"name":"daemonset_name","options":[],"query":"label_values(kube_daemonset_metadata_generation{namespace=\"$daemonset_namespace\"}, daemonset)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-30m","to":"now"},"timepicker":{"hidden":false,"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"DaemonSet","uid":"gekRLzHiz","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false} \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/w_deployment-dashboard.json b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/w_deployment-dashboard.json new file mode 100644 index 0000000000000000000000000000000000000000..1c63ffc364d5c98547ce4998d6540ecd959f4929 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/w_deployment-dashboard.json @@ -0,0 +1 @@ +{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":1,"id":null,"iteration":1543396071820,"links":[],"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":8,"x":0,"y":0},"id":8,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"cores","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m]))","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"CPU","type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":8,"x":8,"y":0},"id":9,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"GB","postfixFontSize":"50%","prefix":"","prefixFontSize":"80%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(container_memory_usage_bytes{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}) / 1024^3","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Memory","type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"Bps","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":false},"gridPos":{"h":5,"w":8,"x":16,"y":0},"id":7,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(rate(container_network_transmit_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m]))","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Network","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":false},"gridPos":{"h":3,"w":6,"x":0,"y":5},"id":5,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"metric":"kube_deployment_spec_replicas","refId":"A","step":600}],"thresholds":"","title":"Desired Replicas","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":6,"y":5},"id":6,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"min(kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Available Replicas","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":12,"y":5},"id":3,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_deployment_status_observed_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Observed Generation","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":18,"y":5},"id":2,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_deployment_metadata_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Metadata Generation","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":9,"w":24,"x":0,"y":8},"id":1,"isNew":true,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"max(kube_deployment_status_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"legendFormat":"current replicas","refId":"A","step":30},{"expr":"min(kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"legendFormat":"available","refId":"B","step":30},{"expr":"max(kube_deployment_status_replicas_unavailable{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"legendFormat":"unavailable","refId":"C","step":30},{"expr":"min(kube_deployment_status_replicas_updated{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"legendFormat":"updated","refId":"D","step":30},{"expr":"max(kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)","intervalFactor":2,"legendFormat":"desired","refId":"E","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Replicas","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":"","logBase":1,"show":true},{"format":"short","label":"","logBase":1,"show":false}],"yaxis":{"align":false,"alignLevel":null}}],"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[{"allValue":".*","current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"Namespace","multi":false,"name":"deployment_namespace","options":[],"query":"label_values(kube_deployment_metadata_generation, namespace)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":null,"tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"Deployment","multi":false,"name":"deployment_name","options":[],"query":"label_values(kube_deployment_metadata_generation{namespace=\"$deployment_namespace\"}, deployment)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"deployment","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Deployment","uid":"kZdoIYxik","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false} \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/w_pods-dashboard.json b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/w_pods-dashboard.json new file mode 100644 index 0000000000000000000000000000000000000000..6e465c31874eb917f3c1d3457932b3f92d195dee --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/w_pods-dashboard.json @@ -0,0 +1 @@ +{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":1,"id":null,"iteration":1543396157762,"links":[],"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"isNew":false,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by(container_name) (container_memory_usage_bytes{pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})","interval":"10s","intervalFactor":1,"legendFormat":"Current: {{ container_name }}","metric":"container_memory_usage_bytes","refId":"A","step":15},{"expr":"kube_pod_container_resource_requests_memory_bytes{pod=\"$pod\", container=~\"$container\"}","interval":"10s","intervalFactor":2,"legendFormat":"Requested: {{ container }}","metric":"kube_pod_container_resource_requests_memory_bytes","refId":"B","step":20},{"expr":"kube_pod_container_resource_limits_memory_bytes{pod=\"$pod\", container=~\"$container\"}","interval":"10s","intervalFactor":2,"legendFormat":"Limit: {{ container }}","metric":"kube_pod_container_resource_limits_memory_bytes","refId":"C","step":20}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"isNew":false,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (container_name)(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))","intervalFactor":2,"legendFormat":"{{ container_name }}","refId":"A","step":30},{"expr":"kube_pod_container_resource_requests_cpu_cores{pod=\"$pod\", container=~\"$container\"}","interval":"10s","intervalFactor":2,"legendFormat":"Requested: {{ container }}","metric":"kube_pod_container_resource_requests_cpu_cores","refId":"B","step":20},{"expr":"kube_pod_container_resource_limits_cpu_cores{pod=\"$pod\", container=~\"$container\"}","interval":"10s","intervalFactor":2,"legendFormat":"Limit: {{ container }}","metric":"kube_pod_container_resource_limits_memory_bytes","refId":"C","step":20}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"isNew":false,"legend":{"alignAsTable":true,"avg":true,"current":true,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":true},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{pod_name=\"$pod\"}[1m])))","intervalFactor":2,"legendFormat":"{{ pod_name }}","refId":"A","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network I/O","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","logBase":1,"show":true},{"format":"short","logBase":1,"show":true}],"yaxis":{"align":false,"alignLevel":null}}],"refresh":false,"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[{"allValue":".*","current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":true,"label":"Namespace","multi":false,"name":"namespace","options":[],"query":"label_values(kube_pod_info, namespace)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"Pod","multi":false,"name":"pod","options":[],"query":"label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":".*","current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":true,"label":"Container","multi":false,"name":"container","options":[],"query":"label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"Pods","uid":"XSOTSYxiz","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false} \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/w_statefulset-dashboard.json b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/w_statefulset-dashboard.json new file mode 100644 index 0000000000000000000000000000000000000000..aa7ed7c81cbf0db6065325e2965dbe3471604f46 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/dashboards/w_statefulset-dashboard.json @@ -0,0 +1 @@ +{"dashboard":{"__inputs":[{"name":"DS_PROMETHEUS","label":"Rancher-Monitoring","description":"","type":"datasource","pluginId":"prometheus","pluginName":"Prometheus"}],"__requires":[{"type":"grafana","id":"grafana","name":"Grafana","version":"5.3.0"},{"type":"panel","id":"graph","name":"Graph","version":"5.0.0"},{"type":"datasource","id":"prometheus","name":"Prometheus","version":"5.0.0"},{"type":"panel","id":"singlestat","name":"Singlestat","version":"5.0.0"}],"annotations":{"list":[{"builtIn":1,"datasource":"-- Grafana --","enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"gnetId":null,"graphTooltip":1,"id":null,"iteration":1543396179999,"links":[],"panels":[{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":8,"x":0,"y":0},"id":8,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"cores","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(rate(container_cpu_usage_seconds_total{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}[3m]))","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"CPU","type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":5,"w":8,"x":8,"y":0},"id":9,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"GB","postfixFontSize":"50%","prefix":"","prefixFontSize":"80%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(container_memory_usage_bytes{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}) / 1024^3","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Memory","type":"singlestat","valueFontSize":"110%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"Bps","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":false},"gridPos":{"h":5,"w":8,"x":16,"y":0},"id":7,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":true},"tableColumn":"","targets":[{"expr":"sum(rate(container_network_transmit_bytes_total{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}[3m]))","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Network","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":false},"gridPos":{"h":3,"w":6,"x":0,"y":5},"id":5,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_statefulset_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)","intervalFactor":2,"metric":"kube_statefulset_replicas","refId":"A","step":600}],"thresholds":"","title":"Desired Replicas","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":6,"y":5},"id":6,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"min(kube_statefulset_status_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Available Replicas","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":12,"y":5},"id":3,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_statefulset_status_observed_generation{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Observed Generation","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"cacheTimeout":null,"colorBackground":false,"colorValue":false,"colors":["rgba(245, 54, 54, 0.9)","rgba(237, 129, 40, 0.89)","rgba(50, 172, 45, 0.97)"],"datasource":"${DS_PROMETHEUS}","editable":true,"format":"none","gauge":{"maxValue":100,"minValue":0,"show":false,"thresholdLabels":false,"thresholdMarkers":true},"gridPos":{"h":3,"w":6,"x":18,"y":5},"id":2,"interval":null,"links":[],"mappingType":1,"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"maxDataPoints":100,"nullPointMode":"connected","nullText":null,"postfix":"","postfixFontSize":"50%","prefix":"","prefixFontSize":"50%","rangeMaps":[{"from":"null","text":"N/A","to":"null"}],"sparkline":{"fillColor":"rgba(31, 118, 189, 0.18)","full":false,"lineColor":"rgb(31, 120, 193)","show":false},"tableColumn":"","targets":[{"expr":"max(kube_statefulset_metadata_generation{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)","intervalFactor":2,"refId":"A","step":600}],"thresholds":"","title":"Metadata Generation","type":"singlestat","valueFontSize":"80%","valueMaps":[{"op":"=","text":"N/A","value":"null"}],"valueName":"avg"},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"${DS_PROMETHEUS}","editable":true,"error":false,"fill":1,"grid":{"threshold1Color":"rgba(216, 200, 27, 0.27)","threshold2Color":"rgba(234, 112, 112, 0.22)"},"gridPos":{"h":9,"w":24,"x":0,"y":8},"id":1,"isNew":true,"legend":{"alignAsTable":false,"avg":false,"current":false,"hideEmpty":false,"hideZero":false,"max":false,"min":false,"rightSide":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"connected","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"min(kube_statefulset_status_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)","intervalFactor":2,"legendFormat":"available","refId":"B","step":30},{"expr":"max(kube_statefulset_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)","intervalFactor":2,"legendFormat":"desired","refId":"E","step":30}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Replicas","tooltip":{"msResolution":true,"shared":true,"sort":0,"value_type":"cumulative"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":"","logBase":1,"show":true},{"format":"short","label":"","logBase":1,"show":false}],"yaxis":{"align":false,"alignLevel":null}}],"schemaVersion":16,"style":"dark","tags":[],"templating":{"list":[{"allValue":".*","current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"Namespace","multi":false,"name":"statefulset_namespace","options":[],"query":"label_values(kube_statefulset_metadata_generation, namespace)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":null,"tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"${DS_PROMETHEUS}","hide":0,"includeAll":false,"label":"StatefulSet","multi":false,"name":"statefulset_name","options":[],"query":"label_values(kube_statefulset_metadata_generation{namespace=\"$statefulset_namespace\"}, statefulset)","refresh":1,"regex":"","skipUrlSync":false,"sort":0,"tagValuesQuery":"","tags":[],"tagsQuery":"statefulset","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone":"browser","title":"StatefulSet","uid":"TDdTILbiz","version":1},"inputs":[{"name":"DS_PROMETHEUS","pluginId":"prometheus","type":"datasource","value":"Rancher-Monitoring"}],"overwrite":false} \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/dashboards-configmap.yaml b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/dashboards-configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d2a01faa972ad74d349a91c8ec898d00e43ac65 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/dashboards-configmap.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.dashboards.fullname" . }} +data: +{{- if eq .Values.level "cluster" }} +{{ (.Files.Glob "dashboards/c_*.json").AsConfig | indent 2 }} +{{- end }} +{{ (.Files.Glob "dashboards/w_*.json").AsConfig | indent 2 }} + prometheus-datasource.json: |+ + { + "access": "proxy", + "basicAuth": false, + "editable": false, + "isDefault:": true, + "name": "Rancher-Monitoring", + "type": "prometheus", + "url": "{{ .Values.prometheusDatasourceURL }}" + } diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/deployment.yaml b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/deployment.yaml new file mode 100755 index 0000000000000000000000000000000000000000..70474a2140de7eb8d1503711a3009c4efb465fd2 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/deployment.yaml @@ -0,0 +1,175 @@ +apiVersion: {{ template "deployment_api_version" . }} +kind: Deployment +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.fullname" . }} +spec: + replicas: 1 + selector: + matchLabels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + template: + metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + spec: + initContainers: + - name: grafana-init-plugin-json-copy + image: {{ .Values.image.repository }}:{{ .Values.image.tag }} + volumeMounts: + - name: grafana-static-hooks + mountPath: /run.sh + subPath: copy-datasource-plugin-json.sh + - name: grafana-static-contents + mountPath: /host + - name: grafana-init-plugin-json-modify + image: {{ .Values.image.inits.tools.repository }}:{{ .Values.image.inits.tools.tag }} + command: + - /usr/bin/modify-datasource-plugin-json.sh + volumeMounts: + - name: grafana-static-hooks + mountPath: /usr/bin/modify-datasource-plugin-json.sh + subPath: modify-datasource-plugin-json.sh + - name: grafana-static-contents + mountPath: /host + containers: + - name: grafana + image: {{ .Values.image.repository }}:{{ .Values.image.tag }} + env: + - name: GF_AUTH_BASIC_ENABLED + value: "true" + - name: GF_AUTH_ANONYMOUS_ENABLED + value: "true" + - name: GF_SECURITY_ADMIN_USER + valueFrom: + secretKeyRef: + name: {{ template "app.fullname" . }} + key: user + - name: GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: {{ template "app.fullname" . }} + key: password +{{- if .Values.extraVars }} +{{ toYaml .Values.extraVars | indent 8 }} +{{- end }} + volumeMounts: + - name: grafana-storage + mountPath: /var/lib/grafana + - name: grafana-static-contents + mountPath: /usr/share/grafana/public/app/plugins/datasource/prometheus/plugin.json + subPath: grafana/plugin.json + {{- if .Values.mountGrafanaConfig }} + - name: grafana-config + mountPath: /etc/grafana + {{- end }} + ports: + - name: web + containerPort: 3000 + protocol: TCP + readinessProbe: + httpGet: + path: /api/health + port: 3000 + periodSeconds: 1 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 10 + {{- if .Values.resources }} + resources: +{{ toYaml .Values.resources | indent 12 }} + {{- end }} + - name: grafana-watcher + image: {{ .Values.grafanaWatcher.repository }}:{{ .Values.grafanaWatcher.tag }} + args: + - '--grafana-url=http://127.0.0.1:3000' + - '--watch-dir=/var/grafana-dashboards' + {{- range .Values.dashboardConfigmaps }} + - '--watch-dir=/var/additional-dashboards/{{ . }}' + {{- end }} + env: + - name: GRAFANA_USER + valueFrom: + secretKeyRef: + name: {{ template "app.fullname" . }} + key: user + - name: GRAFANA_PASSWORD + valueFrom: + secretKeyRef: + name: {{ template "app.fullname" . }} + key: password + {{- if .Values.grafanaWatcher.resources }} + resources: +{{ toYaml .Values.grafanaWatcher.resources | indent 12 }} + {{- end }} + volumeMounts: + - name: grafana-dashboards + mountPath: /var/grafana-dashboards + {{- range .Values.dashboardConfigmaps }} + - name: {{ . }} + mountPath: /var/additional-dashboards/{{ . }} + {{- end }} + - name: grafana-proxy + image: {{ .Values.grafanaProxy.repository }}:{{ .Values.grafanaProxy.tag }} + args: + - nginx + - -g + - daemon off; + - -c + - /nginx/nginx.conf + ports: + - name: http + containerPort: 80 + protocol: TCP + volumeMounts: + - mountPath: /nginx/ + name: grafana-nginx + {{- if .Values.nodeSelector }} + nodeSelector: + {{ toYaml .Values.nodeSelector | indent 4 }} + {{- end }} + {{- if .Values.enabledRBAC }} + serviceAccountName: {{ .Values.serviceAccountName }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} + volumes: + - name: grafana-static-hooks + configMap: + name: {{ template "app.hooks.fullname" . }} + defaultMode: 0777 + - name: grafana-static-contents + emptyDir: {} + - name: grafana-storage + {{- if or .Values.storageSpec .Values.persistence.enabled }} + persistentVolumeClaim: + claimName: {{ template "app.fullname" . }} + {{- else }} + emptyDir: {} + {{- end }} + - name: grafana-nginx + configMap: + defaultMode: 438 + items: + - key: nginx.conf + mode: 438 + path: nginx.conf + name: {{ template "app.nginx.fullname" . }} + - name: grafana-dashboards + configMap: + name: {{ template "app.dashboards.fullname" . }} + {{- range .Values.dashboardConfigmaps }} + - name: {{ . }} + configMap: + name: {{ . }} + {{- end }} diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/hooks-configmap.yaml b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/hooks-configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5cd8cb29e01d3571f6b3bb015bf504eabfb3cd73 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/hooks-configmap.yaml @@ -0,0 +1,46 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.hooks.fullname" . }} +data: + copy-datasource-plugin-json.sh: |- + #!/bin/bash + + srcpath="/usr/share/grafana/public/app/plugins/datasource/prometheus/plugin.json" + dstpath="/host/grafana/raw-plugin.json" + + if [[ -f $srcpath ]] && [[ -d /host ]]; then + mkdir -p /host/grafana + cp -f $srcpath $dstpath + cat $srcpath + + exit 0 + fi + + exit 1 + + modify-datasource-plugin-json.sh: |- + #!/bin/sh + + srcpath="/host/grafana/raw-plugin.json" + dstpath="/host/grafana/plugin.json" + + if [ -f $srcpath ] && [ -d /host ]; then + mkdir -p /host/grafana + + token=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token) + + cat $srcpath | K8S_BEARERTOKEN="Bearer $token" jq 'to_entries | . + [{"key":"routes","value":[{"path":"api/v1","url":"{{ .Values.prometheusDatasourceURL }}/api/v1","headers":[{"name":"Authorization","content":env.K8S_BEARERTOKEN}]}]}] | from_entries' > $dstpath + cat $dstpath + + exit 0 + fi + + exit 1 + + diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/metrics-service.yaml b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/metrics-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e04643d00eeb9994c83ab54ef962f7f71ea19aea --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/metrics-service.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Service +metadata: + name: expose-grafana-metrics + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + monitoring.cattle.io: "true" +spec: + type: ClusterIP + selector: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + ports: + - name: web + port: 3000 + targetPort: web \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/nginx-configmap.yaml b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/nginx-configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dfe8092aabf96f111486253bca5ef665833d2c65 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/nginx-configmap.yaml @@ -0,0 +1,68 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "app.nginx.fullname" . }} + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + component: nginx +data: + nginx.conf: |- + user nginx; + worker_processes auto; + error_log /dev/null warn; + pid /var/run/nginx.pid; + + events { + worker_connections 1024; + } + + http { + include /etc/nginx/mime.types; + + log_format main '[$time_local - $status] $remote_addr - $remote_user $request ($http_referer)'; + + server { + listen 80; + + access_log off; + + gzip on; + gzip_min_length 1k; + gzip_comp_level 2; + gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript image/jpeg image/gif image/png; + gzip_vary on; + gzip_disable "MSIE [1-6]\."; + + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + location /api/dashboards { + proxy_pass http://localhost:3000; + } + + location /api/search { + proxy_pass http://localhost:3000; + + sub_filter_types application/json; + sub_filter_once off; + sub_filter '"url":"/d' '"url":"d'; + } + + location / { + proxy_pass http://localhost:3000/; + + sub_filter_types text/html; + sub_filter_once off; + sub_filter '"appSubUrl":""' '"appSubUrl":"."'; + sub_filter '"url":"/' '"url":"./'; + sub_filter ':"/avatar/' ':"avatar/'; + } + + } + + } diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/pvc.yaml b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/pvc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1443336ff981fab96221fe976135c03f72d972d3 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/pvc.yaml @@ -0,0 +1,24 @@ +{{- if or .Values.storageSpec .Values.persistence.enabled -}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.fullname" . }} +spec: + {{- if .Values.storageSpec }} +{{ toYaml .Values.storageSpec | indent 2 }} + {{- else }} + accessModes: + - {{ default "ReadWriteOnce" .Values.persistence.accessMode }} + {{ if and .Values.persistence.storageClass (ne "default" .Values.persistence.storageClass) }} + storageClassName: {{ .Values.persistence.storageClass }} + {{ end }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- end }} +{{- end -}} diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/secret.yaml b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/secret.yaml new file mode 100644 index 0000000000000000000000000000000000000000..658146a26ece6ace7da3b578668c23f3f8a96525 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/secret.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Secret +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.fullname" . }} +type: Opaque +data: + user: {{ .Values.adminUser | b64enc | quote }} + {{- if .Values.adminPassword }} + password: {{ .Values.adminPassword | b64enc | quote }} + {{- else }} + password: {{ randAlphaNum 10 | b64enc | quote }} + {{- end }} diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/service.yaml b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd26aa2abdff557a3e61ff447bae1252d769fc2a --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/service.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + name: access-grafana + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + kubernetes.io/cluster-service: "true" +spec: + type: ClusterIP + sessionAffinity: ClientIP + selector: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + ports: + - name: http + port: 80 + targetPort: http diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/servicemonitor.yaml b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/servicemonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bdb2f5c1bb0bb67c0ec973dee23a73530a530b7a --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/templates/servicemonitor.yaml @@ -0,0 +1,27 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: ServiceMonitor +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.serviceMonitor.labels }} +{{ toYaml .Values.serviceMonitor.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + jobLabel: grafana + selector: + matchLabels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + monitoring.cattle.io: "true" + namespaceSelector: + matchNames: + - {{ .Release.Namespace | quote }} + endpoints: + - port: web + interval: 30s diff --git a/charts/rancher-monitoring/v0.0.1/charts/grafana/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/grafana/values.yaml new file mode 100755 index 0000000000000000000000000000000000000000..94345d9b543094929e500554189f9d869e61df55 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/grafana/values.yaml @@ -0,0 +1,106 @@ +level: cluster + +enabledRBAC: true + +## Already exist ServiceAccount +## +serviceAccountName: "" + +enabledPSP: true + +## CRD apiGroup +## +apiGroup: "monitoring.coreos.com" + +## Node labels for Grafana pod assignment +## Ref: https://kubernetes.io/docs/user-guide/node-selection/ +## +nodeSelector: {} + +## Tolerations for use with node taints +## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +## +tolerations: {} + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + +serviceMonitor: + ## Custom Labels to be added to ServiceMonitor + ## + labels: {} + +## Pass extra environment variables to the Grafana container. +## +# extraVars: +# - name: EXTRA_VAR_1 +# value: extra-var-value-1 +# - name: EXTRA_VAR_2 +# value: extra-var-value-2 +extraVars: + +adminUser: "admin" +adminPassword: "admin" + +## Grafana Docker image +## +image: + repository: grafana/grafana + tag: 5.3.0 + inits: + tools: + repository: maiwj/curl + tag: 7.56.1-r0 + +storageSpec: {} +# storageClassName: default +# accessModes: +# - ReadWriteOnce +# resources: +# requests: +# storage: 2Gi +# selector: {} + +## Easy way to create persistent data +## +persistence: {} +# enabled: true +# storageClass: gluster +# accessMode: "ReadWriteOnce" +# size: 50Gi + +## Resource limits & requests +## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ +resources: {} + # limits: + # memory: 200Mi + # cpu: 200m + # requests: + # memory: 100Mi + # cpu: 100m + + +## A list of additional configmaps that contain -dashboard.json and/or -datasource.json files +## that should be imported into grafana. +dashboardConfigmaps: [] + +prometheusDatasourceURL: "" + +grafanaProxy: + repository: nginx + tag: 1.15.2 + +grafanaWatcher: + repository: quay.io/coreos/grafana-watcher + tag: v0.0.8 + + ## Resource limits & requests + ## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ + resources: {} + #requests: + # memory: "16Mi" + # cpu: "50m" + #limits: + # memory: "32Mi" + # cpu: "100m" diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/Chart.yaml new file mode 100755 index 0000000000000000000000000000000000000000..8b321b80b9ac17aec40fce343f48aa884c8e8746 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +description: Creates Metrics CRD of Rancher monitoring graph +engine: gotpl +maintainers: +- name: aiwantaozi + email: michelia.feng@gmail.com +name: metric-expression-cluster +version: 0.0.1 + diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionapiserver.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionapiserver.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29680868a2e877c8e0325d86e15f259d902a83ed --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionapiserver.yaml @@ -0,0 +1,105 @@ +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: apiserver-request-latency-milliseconds-avg + labels: + app: metric-expression + component: apiserver + details: "false" + level: cluster + metric: request-latency-milliseconds-avg + source: rancher-monitoring +spec: + expression: avg(apiserver_request_latencies_sum / apiserver_request_latencies_count) + by (instance) /1e+06 + legendFormat: '[[instance]]' + description: apiserver request latency milliseconds avg +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: apiserver-request-latency-milliseconds-avg-details + labels: + app: metric-expression + component: apiserver + details: "true" + level: cluster + metric: request-latency-milliseconds-avg + source: rancher-monitoring +spec: + expression: avg(apiserver_request_latencies_sum / apiserver_request_latencies_count) + by (instance, verb) /1e+06 + legendFormat: '[[verb]]([[instance]])' + description: apiserver request latency milliseconds avg +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: apiserver-request-count-sum-rate + labels: + app: metric-expression + component: apiserver + details: "false" + graph: request-count + level: cluster + metric: request-count-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(apiserver_request_count[5m])) by (instance) + legendFormat: '[[instance]]' + description: apiserver request count sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: apiserver-request-count-sum-rate-details + labels: + app: metric-expression + component: apiserver + details: "true" + graph: request-count + level: cluster + metric: request-count-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(apiserver_request_count[5m])) by (instance, + code) + legendFormat: '[[code]]([[instance]])' + description: apiserver request count sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: apiserver-request-error-count-sum-rate + labels: + app: metric-expression + component: apiserver + details: "false" + graph: request-count + level: cluster + metric: request-error-count-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(apiserver_request_count{instance=~"$instance", code!~"2.."}[5m])) + by (instance) + legendFormat: '[[instance]]' + description: apiserver request error count sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: apiserver-request-error-count-sum-rate-details + labels: + app: metric-expression + component: apiserver + details: "true" + graph: request-count + level: cluster + metric: request-error-count-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(apiserver_request_count{instance=~"$instance", code!~"2.."}[5m])) + by (instance, code) + legendFormat: '[[code]]([[instance]])' + description: apiserver request error count sum rate +--- diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressioncluster.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressioncluster.yaml new file mode 100644 index 0000000000000000000000000000000000000000..46b2e6e4d024acf368c59917fe23989a281ed128 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressioncluster.yaml @@ -0,0 +1,572 @@ +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-disk-io-reads-bytes-sum-rate + labels: + app: metric-expression + component: cluster + details: "false" + graph: disk-io + level: cluster + metric: disk-io-reads-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_disk_read_bytes_total[5m])) by + () * 8 / 1024 + legendFormat: Read + description: cluster disk io reads bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-disk-io-reads-bytes-sum-rate-details + labels: + app: metric-expression + component: cluster + details: "true" + graph: disk-io + level: cluster + metric: disk-io-reads-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_disk_read_bytes_total[5m])) by + (instance) * 8 / 1024 + legendFormat: Read([[instance]]) + description: cluster disk io reads bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-transmit-bytes-sum + labels: + app: metric-expression + component: cluster + details: "false" + graph: network-io + level: cluster + metric: network-transmit-bytes-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + * 8 / 1024 + legendFormat: Transmit + description: cluster network transmit bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-transmit-bytes-sum-details + labels: + app: metric-expression + component: cluster + details: "true" + graph: network-io + level: cluster + metric: network-transmit-bytes-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + by (instance) * 8 / 1024 + legendFormat: Transmit([[instance]]) + description: cluster network transmit bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-cpu-load-5 + labels: + app: metric-expression + component: cluster + details: "false" + graph: cpu-load + level: cluster + metric: cpu-load-5 + source: rancher-monitoring +spec: + expression: sum(node_load5) / count(node_cpu_seconds_total{mode="system"}) + + legendFormat: Load5 + description: cluster cpu load 5 +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-cpu-load-5-details + labels: + app: metric-expression + component: cluster + details: "true" + graph: cpu-load + level: cluster + metric: cpu-load-5 + source: rancher-monitoring +spec: + expression: sum(node_load5) by (instance) / count(node_cpu_seconds_total{mode="system"}) + by (instance) + legendFormat: Load5([[instance]]) + description: cluster cpu load 5 +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-cpu-load-1 + labels: + app: metric-expression + component: cluster + details: "false" + graph: cpu-load + level: cluster + metric: cpu-load-1 + source: rancher-monitoring +spec: + expression: sum(node_load1) / count(node_cpu_seconds_total{mode="system"}) + + legendFormat: Load1 + description: cluster cpu load 1 +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-cpu-load-1-details + labels: + app: metric-expression + component: cluster + details: "true" + graph: cpu-load + level: cluster + metric: cpu-load-1 + source: rancher-monitoring +spec: + expression: sum(node_load1) by (instance) / count(node_cpu_seconds_total{mode="system"}) + by (instance) + legendFormat: Load1([[instance]]) + description: cluster cpu load 1 +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-disk-io-writes-bytes-sum-rate + labels: + app: metric-expression + component: cluster + details: "false" + graph: disk-io + level: cluster + metric: disk-io-writes-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_disk_written_bytes_total[5m])) + * 8 / 1024 + legendFormat: Write + description: cluster disk io writes bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-disk-io-writes-bytes-sum-rate-details + labels: + app: metric-expression + component: cluster + details: "true" + graph: disk-io + level: cluster + metric: disk-io-writes-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_disk_written_bytes_total[5m])) + by (instance) * 8 / 1024 + legendFormat: Write([[instance]]) + description: cluster disk io writes bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-fs-usage-percent + labels: + app: metric-expression + component: cluster + details: "false" + level: cluster + metric: fs-usage-percent + source: rancher-monitoring +spec: + expression: (sum(node_filesystem_size_bytes{device!="rootfs"}) + - sum(node_filesystem_free_bytes{device!="rootfs"}) + ) / sum(node_filesystem_size_bytes{device!="rootfs"}) + + legendFormat: Disk usage + description: cluster fs usage percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-fs-usage-percent-details + labels: + app: metric-expression + component: cluster + details: "true" + level: cluster + metric: fs-usage-percent + source: rancher-monitoring +spec: + expression: (sum(node_filesystem_size_bytes{device!="rootfs"}) + by (instance) - sum(node_filesystem_free_bytes{device!="rootfs"}) + by (instance)) / sum(node_filesystem_size_bytes{device!="rootfs"}) + by (instance) + legendFormat: '[[instance]]' + description: cluster fs usage percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-receive-errors-sum + labels: + app: metric-expression + component: cluster + details: "false" + graph: network-packet + level: cluster + metric: network-receive-errors-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + + legendFormat: Receive errors + description: cluster network receive errors sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-receive-errors-sum-details + labels: + app: metric-expression + component: cluster + details: "true" + graph: network-packet + level: cluster + metric: network-receive-errors-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + by (instance) + legendFormat: Receive errors([[instance]]) + description: cluster network receive errors sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-cpu-load-15 + labels: + app: metric-expression + component: cluster + details: "false" + graph: cpu-load + level: cluster + metric: cpu-load-15 + source: rancher-monitoring +spec: + expression: sum(node_load15) / count(node_cpu_seconds_total{mode="system"}) + + legendFormat: Load15 + description: cluster cpu load 15 +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-cpu-load-15-details + labels: + app: metric-expression + component: cluster + details: "true" + graph: cpu-load + level: cluster + metric: cpu-load-15 + source: rancher-monitoring +spec: + expression: sum(node_load15) by (instance) / count(node_cpu_seconds_total{mode="system"}) + by (instance) + legendFormat: Load15([[instance]]) + description: cluster cpu load 15 +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-receive-bytes-sum + labels: + app: metric-expression + component: cluster + details: "false" + graph: network-io + level: cluster + metric: network-receive-bytes-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + * 8 / 1024 + legendFormat: Receive + description: cluster network receive bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-receive-bytes-sum-details + labels: + app: metric-expression + component: cluster + details: "true" + graph: network-io + level: cluster + metric: network-receive-bytes-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + by (instance) * 8 / 1024 + legendFormat: Receive([[instance]]) + description: cluster network receive bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-receive-packets-sum + labels: + app: metric-expression + component: cluster + details: "false" + graph: network-packet + level: cluster + metric: network-receive-packets-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + + legendFormat: Receive packets + description: cluster network receive packets sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-receive-packets-sum-details + labels: + app: metric-expression + component: cluster + details: "true" + graph: network-packet + level: cluster + metric: network-receive-packets-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + by (instance) + legendFormat: Receive packets([[instance]]) + description: cluster network receive packets sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-transmit-errors-sum + labels: + app: metric-expression + component: cluster + details: "false" + graph: network-packet + level: cluster + metric: network-transmit-errors-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + + legendFormat: Transmit errors + description: cluster network transmit errors sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-transmit-errors-sum-details + labels: + app: metric-expression + component: cluster + details: "true" + graph: network-packet + level: cluster + metric: network-transmit-errors-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + by (instance) + legendFormat: Transmit errors([[instance]]) + description: cluster network transmit errors sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-receive-packets-dropped-sum + labels: + app: metric-expression + component: cluster + details: "false" + graph: network-packet + level: cluster + metric: network-receive-packets-dropped-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + + legendFormat: Receive dropped + description: cluster network receive packets dropped sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-receive-packets-dropped-sum-details + labels: + app: metric-expression + component: cluster + details: "true" + graph: network-packet + level: cluster + metric: network-receive-packets-dropped-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + by (instance) + legendFormat: Receive dropped([[instance]]) + description: cluster network receive packets dropped sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-transmit-packets-dropped-sum + labels: + app: metric-expression + component: cluster + details: "false" + graph: network-packet + level: cluster + metric: network-transmit-packets-dropped-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + + legendFormat: Transmit dropped + description: cluster network transmit packets dropped sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-transmit-packets-dropped-sum-details + labels: + app: metric-expression + component: cluster + details: "true" + graph: network-packet + level: cluster + metric: network-transmit-packets-dropped-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + by (instance) + legendFormat: Transmit dropped([[instance]]) + description: cluster network transmit packets dropped sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-transmit-packets-sum + labels: + app: metric-expression + component: cluster + details: "false" + graph: network-packet + level: cluster + metric: network-transmit-packets-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + + legendFormat: Transmit packets + description: cluster network transmit packets sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-network-transmit-packets-sum-details + labels: + app: metric-expression + component: cluster + details: "true" + graph: network-packet + level: cluster + metric: network-transmit-packets-sum + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m])) + by (instance) + legendFormat: Transmit packets([[instance]]) + description: cluster network transmit packets sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-cpu-usage-seconds-sum-rate + labels: + app: metric-expression + component: cluster + details: "false" + level: cluster + metric: cpu-usage-seconds-sum-rate + source: rancher-monitoring +spec: + expression: 1 - (avg(irate(node_cpu_seconds_total{mode="idle"}[5m]))) + legendFormat: CPU usage + description: cluster cpu usage seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-cpu-usage-seconds-sum-rate-details + labels: + app: metric-expression + component: cluster + details: "true" + level: cluster + metric: cpu-usage-seconds-sum-rate + source: rancher-monitoring +spec: + expression: 1 - (avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance)) + legendFormat: '[[instance]]' + description: cluster cpu usage seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-memory-usage-percent + labels: + app: metric-expression + component: cluster + details: "false" + level: cluster + metric: memory-usage-percent + source: rancher-monitoring +spec: + expression: 1 - sum(node_memory_MemAvailable_bytes) + / sum(node_memory_MemTotal_bytes) + legendFormat: Memory usage + description: cluster memory usage percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: cluster-memory-usage-percent-details + labels: + app: metric-expression + component: cluster + details: "true" + level: cluster + metric: memory-usage-percent + source: rancher-monitoring +spec: + expression: 1 - sum(node_memory_MemAvailable_bytes) by (instance) + / sum(node_memory_MemTotal_bytes) by (instance) + legendFormat: '[[instance]]' + description: cluster memory usage percent +--- diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressioncontainer.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressioncontainer.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81f67b189bded804cad49453e8bb1d27705cbf74 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressioncontainer.yaml @@ -0,0 +1,314 @@ +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-cpu-cfs-throttled-seconds-sum-rate + labels: + app: metric-expression + component: container + details: "false" + graph: container-cpu-usage-details + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name=~"$containerName"}[5m])) by (container_name) + legendFormat: CPU cfs throttled + description: container cpu cfs throttled seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-cpu-cfs-throttled-seconds-sum-rate-details + labels: + app: metric-expression + component: container + details: "true" + graph: container-cpu-usage-details + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name=~"$containerName"}[5m])) by (container_name) + legendFormat: CPU cfs throttled([[container_name]]) + description: container cpu cfs throttled seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-cpu-usage-seconds-sum-rate + labels: + app: metric-expression + component: container + details: "false" + graph: container-cpu-usage + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_usage_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name=~"$containerName"}[5m])) by (container_name) + legendFormat: CPU usage + description: container cpu usage seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-cpu-usage-seconds-sum-rate-details + labels: + app: metric-expression + component: container + details: "true" + graph: container-cpu-usage + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_usage_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name=~"$containerName"}[5m])) by (container_name) + legendFormat: CPU usage([[container_name]]) + description: container cpu usage seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-cpu-system-seconds-sum-rate + labels: + app: metric-expression + component: container + details: "false" + graph: container-cpu-usage-details + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_system_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name=~"$containerName"}[5m])) by (container_name) + legendFormat: CPU system seconds + description: container cpu system seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-cpu-system-seconds-sum-rate-details + labels: + app: metric-expression + component: container + details: "true" + graph: container-cpu-usage-details + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_system_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name=~"$containerName"}[5m])) by (container_name) + legendFormat: CPU system seconds([[container_name]]) + description: container cpu system seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-cpu-user-seconds-sum-rate + labels: + app: metric-expression + component: container + details: "false" + graph: container-cpu-usage-details + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_user_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name=~"$containerName"}[5m])) by (container_name) + legendFormat: CPU user seconds + description: container cpu user seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-cpu-user-seconds-sum-rate-details + labels: + app: metric-expression + component: container + details: "true" + graph: container-cpu-usage-details + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_user_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name=~"$containerName"}[5m])) by (container_name) + legendFormat: CPU user seconds([[container_name]]) + description: container cpu user seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-memory-usage-percent + labels: + app: metric-expression + component: container + details: "false" + level: project + metric: memory-usage-percent + source: rancher-monitoring +spec: + expression: sum(container_memory_working_set_bytes{namespace=~"$namespace", pod_name=~"$podName", + container_name=~"$containerName"}) by (container_name) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace=~"$namespace", + pod=~"$podName", container=~"$containerName"},"container_name", "", "container")) + by (container_name) + legendFormat: Memory + description: container memory usage percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-memory-usage-percent-details + labels: + app: metric-expression + component: container + details: "true" + level: project + metric: memory-usage-percent + source: rancher-monitoring +spec: + expression: sum(container_memory_working_set_bytes{namespace=~"$namespace", pod_name=~"$podName", + container_name=~"$containerName"}) by (container_name) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace=~"$namespace", + pod=~"$podName", container=~"$containerName"},"container_name", "", "container")) + by (container_name) + legendFormat: Memory([[container_name]]) + description: container memory usage percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-memory-usage-bytes-sum + labels: + app: metric-expression + component: container + details: "false" + level: project + metric: memory-usage-bytes-sum + source: rancher-monitoring +spec: + expression: sum(container_memory_working_set_bytes{name!~"POD", namespace=~"$namespace",pod_name=~"$podName", + container_name=~"$containerName"}) by (container_name) + legendFormat: Memory usage + description: container memory usage bytes sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-memory-usage-bytes-sum-details + labels: + app: metric-expression + component: container + details: "true" + level: project + metric: memory-usage-bytes-sum + source: rancher-monitoring +spec: + expression: sum(container_memory_working_set_bytes{name!~"POD", namespace=~"$namespace",pod_name=~"$podName", + container_name=~"$containerName"}) by (container_name) + legendFormat: Memory usage([[container_name]]) + description: container memory usage bytes sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-fs-bytes-sum + labels: + app: metric-expression + component: container + details: "false" + level: project + metric: fs-bytes-sum + source: rancher-monitoring +spec: + expression: sum(container_fs_usage_bytes{namespace=~"$namespace", pod_name=~"$podName", + container_name=~"$containerName"}) by (container_name) + legendFormat: Filesystem usage + description: container fs bytes sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-fs-bytes-sum-details + labels: + app: metric-expression + component: container + details: "true" + level: project + metric: fs-bytes-sum + source: rancher-monitoring +spec: + expression: sum(container_fs_usage_bytes{namespace=~"$namespace", pod_name=~"$podName", + container_name=~"$containerName"}) by (container_name) + legendFormat: Filesystem usage([[container_name]]) + description: container fs bytes sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-disk-io-writes-bytes-sum-rate + labels: + app: metric-expression + component: container + details: "false" + graph: disk-io + level: project + metric: disk-io-writes-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_fs_writes_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name=~"$containerName"}[5m])) by (container_name) * 8 / 1024 + legendFormat: Write + description: container disk io writes bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-disk-io-writes-bytes-sum-rate-details + labels: + app: metric-expression + component: container + details: "true" + graph: disk-io + level: project + metric: disk-io-writes-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_fs_writes_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name=~"$containerName"}[5m])) by (container_name) * 8 / 1024 + legendFormat: Write([[container_name]]) + description: container disk io writes bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-disk-io-reads-bytes-sum-rate + labels: + app: metric-expression + component: container + details: "false" + graph: disk-io + level: project + metric: disk-io-reads-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_fs_reads_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name=~"$containerName"}[5m])) by (container_name) * 8 / 1024 + legendFormat: Read + description: container disk io reads bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: container-disk-io-reads-bytes-sum-rate-details + labels: + app: metric-expression + component: container + details: "true" + graph: disk-io + level: project + metric: disk-io-reads-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_fs_reads_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name=~"$containerName"}[5m])) by (container_name) * 8 / 1024 + legendFormat: Read([[container_name]]) + description: container disk io reads bytes sum rate +--- diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressioncontrollermanager.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressioncontrollermanager.yaml new file mode 100644 index 0000000000000000000000000000000000000000..566a5b609b85af0e36c878a36784ddb5992176a6 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressioncontrollermanager.yaml @@ -0,0 +1,288 @@ +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-volumes-depth + labels: + app: metric-expression + component: controllermanager + details: "false" + level: cluster + metric: volumes-depth + source: rancher-monitoring +spec: + expression: sum(volumes_depth) + legendFormat: Volumes depth + description: controllermanager volumes depth +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-volumes-depth-details + labels: + app: metric-expression + component: controllermanager + details: "true" + level: cluster + metric: volumes-depth + source: rancher-monitoring +spec: + expression: sum(volumes_depth) by (instance) + legendFormat: Volumes depth([[instance]]) + description: controllermanager volumes depth +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-deployment-depth + labels: + app: metric-expression + component: controllermanager + details: "false" + level: cluster + metric: deployment-depth + source: rancher-monitoring +spec: + expression: sum(deployment_depth) + legendFormat: Deployment depth + description: controllermanager deployment adds +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-deployment-depth-details + labels: + app: metric-expression + component: controllermanager + details: "true" + level: cluster + metric: deployment-depth + source: rancher-monitoring +spec: + expression: sum(deployment_depth) by (instance) + legendFormat: Deployment depth([[instance]]) + description: controllermanager deployment adds +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-replicaset-depth + labels: + app: metric-expression + component: controllermanager + details: "false" + level: cluster + metric: replicaset-depth + source: rancher-monitoring +spec: + expression: sum(replicaset_depth) + legendFormat: Replicaset depth + description: controllermanager replicaset depth +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-replicaset-depth-details + labels: + app: metric-expression + component: controllermanager + details: "true" + level: cluster + metric: replicaset-depth + source: rancher-monitoring +spec: + expression: sum(replicaset_depth) by (instance) + legendFormat: Replicaset depth([[instance]]) + description: controllermanager replicaset depth +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-service-depth + labels: + app: metric-expression + component: controllermanager + details: "false" + level: cluster + metric: service-depth + source: rancher-monitoring +spec: + expression: sum(service_depth) + legendFormat: Service depth + description: controllermanager service depth +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-service-depth-details + labels: + app: metric-expression + component: controllermanager + details: "true" + level: cluster + metric: service-depth + source: rancher-monitoring +spec: + expression: sum(service_depth) by (instance) + legendFormat: Service depth([[instance]]) + description: controllermanager service depth +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-serviceaccount-depth + labels: + app: metric-expression + component: controllermanager + details: "false" + level: cluster + metric: serviceaccount-depth + source: rancher-monitoring +spec: + expression: sum(serviceaccount_depth) + legendFormat: Serviceaccount depth + description: controllermanager serviceaccount depth +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-serviceaccount-depth-details + labels: + app: metric-expression + component: controllermanager + details: "true" + level: cluster + metric: serviceaccount-depth + source: rancher-monitoring +spec: + expression: sum(serviceaccount_depth) by (instance) + legendFormat: Serviceaccount depth([[instance]]) + description: controllermanager serviceaccount depth +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-endpoint-depth + labels: + app: metric-expression + component: controllermanager + details: "false" + level: cluster + metric: endpoint-depth + source: rancher-monitoring +spec: + expression: sum(endpoint_depth) + legendFormat: Endpoint depth + description: controllermanager endpoint depth +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-endpoint-depth-details + labels: + app: metric-expression + component: controllermanager + details: "true" + level: cluster + metric: endpoint-depth + source: rancher-monitoring +spec: + expression: sum(endpoint_depth) by (instance) + legendFormat: Endpoint depth([[instance]]) + description: controllermanager endpoint depth +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-daemonset-depth + labels: + app: metric-expression + component: controllermanager + details: "false" + level: cluster + metric: daemonset-depth + source: rancher-monitoring +spec: + expression: sum(daemonset_depth) + legendFormat: Daemonset depth + description: controllermanager daemonset depth +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-daemonset-depth-details + labels: + app: metric-expression + component: controllermanager + details: "true" + level: cluster + metric: daemonset-depth + source: rancher-monitoring +spec: + expression: sum(daemonset_depth) by (instance) + legendFormat: Daemonset depth([[instance]]) + description: controllermanager daemonset depth +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-deployment-depth + labels: + app: metric-expression + component: controllermanager + details: "false" + level: cluster + metric: deployment-depth + source: rancher-monitoring +spec: + expression: sum(deployment_depth) + legendFormat: Deployment depth + description: controllermanager deployment depth +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-deployment-depth-details + labels: + app: metric-expression + component: controllermanager + details: "true" + level: cluster + metric: deployment-depth + source: rancher-monitoring +spec: + expression: sum(deployment_depth) by (instance) + legendFormat: Deployment depth([[instance]]) + description: controllermanager deployment depth +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-statefulset-depth + labels: + app: metric-expression + component: controllermanager + details: "false" + level: cluster + metric: statefulset-depth + source: rancher-monitoring +spec: + expression: sum(statefulset_depth) + legendFormat: Statefulset depth + description: controllermanager statefulset adds +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: controllermanager-statefulset-depth-details + labels: + app: metric-expression + component: controllermanager + details: "true" + level: cluster + metric: statefulset-depth + source: rancher-monitoring +spec: + expression: sum(statefulset_depth) by (instance) + legendFormat: Statefulset depth([[instance]]) + description: controllermanager statefulset adds +--- diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionetcd.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionetcd.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b0523b13e5cd6ef83de04470ed9c1f8402892068 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionetcd.yaml @@ -0,0 +1,781 @@ +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-server-failed-proposal + labels: + app: metric-expression + component: etcd + details: "false" + level: cluster + metric: server-failed-proposal + source: rancher-monitoring +spec: + expression: count(up{job="exporter-kube-etcd-cluster-monitoring"}) by (instance) + legendFormat: Failed proposal + description: etcd Server failed proposal +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-server-failed-proposal-details + labels: + app: metric-expression + component: etcd + details: "true" + level: cluster + metric: server-failed-proposal + source: rancher-monitoring +spec: + expression: count(up{job="exporter-kube-etcd-cluster-monitoring"}) by (instance) + legendFormat: Failed proposal + description: etcd Server failed proposal +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-server-leader-changes-seen-sum-increase + labels: + app: metric-expression + component: etcd + details: "false" + level: cluster + metric: server-leader-changes-seen-sum-increase + source: rancher-monitoring +spec: + expression: max(etcd_server_leader_changes_seen_total) + legendFormat: Number of leader changes per hour + description: etcd server leader changes seen sum increase +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-server-leader-changes-seen-sum-increase-details + labels: + app: metric-expression + component: etcd + details: "true" + level: cluster + metric: server-leader-changes-seen-sum-increase + source: rancher-monitoring +spec: + expression: max(etcd_server_leader_changes_seen_total) + legendFormat: Number of leader changes per hour + description: etcd server leader changes seen sum increase +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-grpc-client-receive-bytes-sum-rate + labels: + app: metric-expression + component: etcd + details: "false" + graph: rpc-client-traffic + level: cluster + metric: grpc-client-receive-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(etcd_network_client_grpc_received_bytes_total[5m])) * + 8 / 1024 + legendFormat: Client traffic in + description: etcd grpc client receive bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-grpc-client-receive-bytes-sum-rate-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: rpc-client-traffic + level: cluster + metric: grpc-client-receive-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(etcd_network_client_grpc_received_bytes_total[5m])) by (instance) + * 8 / 1024 + legendFormat: Client traffic in([[instance]]) + description: etcd grpc client receive bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-db-bytes-sum + labels: + app: metric-expression + component: etcd + details: "false" + level: cluster + metric: db-bytes-sum + source: rancher-monitoring +spec: + expression: sum(etcd_debugging_mvcc_db_total_size_in_bytes) + legendFormat: DB size + description: etcd db bytes sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-db-bytes-sum-details + labels: + app: metric-expression + component: etcd + details: "true" + level: cluster + metric: db-bytes-sum + source: rancher-monitoring +spec: + expression: sum(etcd_debugging_mvcc_db_total_size_in_bytes) by (instance) + legendFormat: DB size([[instance]]) + description: etcd db bytes sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-grpc-client-transmit-bytes-sum-rate + labels: + app: metric-expression + component: etcd + details: "false" + graph: rpc-client-traffic + level: cluster + metric: grpc-client-transmit-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(etcd_network_client_grpc_sent_bytes_total[5m])) + legendFormat: Client traffic out + description: etcd grpc client transmit bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-grpc-client-transmit-bytes-sum-rate-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: rpc-client-traffic + level: cluster + metric: grpc-client-transmit-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(etcd_network_client_grpc_sent_bytes_total[5m])) by (instance) + legendFormat: Client traffic out([[instance]]) + description: etcd grpc client transmit bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-server-leader-sum + labels: + app: metric-expression + component: etcd + details: "false" + level: cluster + metric: server-leader-sum + source: rancher-monitoring +spec: + expression: max(etcd_server_has_leader) + legendFormat: Has leader + description: etcd server leader sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-server-leader-sum-details + labels: + app: metric-expression + component: etcd + details: "true" + level: cluster + metric: server-leader-sum + source: rancher-monitoring +spec: + expression: max(etcd_server_has_leader) + legendFormat: Has leader + description: etcd server leader sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-server-proposals-committed-sum-increase + labels: + app: metric-expression + component: etcd + details: "false" + graph: proposal + level: cluster + metric: server-proposals-committed-sum-increase + source: rancher-monitoring +spec: + expression: sum(increase(etcd_server_proposals_committed_total[5m])) + legendFormat: Proposal commit rate + description: etcd server proposals committed sum increase +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-server-proposals-committed-sum-increase-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: proposal + level: cluster + metric: server-proposals-committed-sum-increase + source: rancher-monitoring +spec: + expression: sum(increase(etcd_server_proposals_committed_total[5m])) by (instance) + legendFormat: Proposal commit rate([[instance]]) + description: etcd server proposals committed sum increase +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-server-proposals-applied-sum-increase + labels: + app: metric-expression + component: etcd + details: "false" + graph: proposal + level: cluster + metric: server-proposals-applied-sum-increase + source: rancher-monitoring +spec: + expression: sum(increase(etcd_server_proposals_applied_total[5m])) + legendFormat: Proposals applied + description: etcd server proposals applied sum increase +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-server-proposals-applied-sum-increase-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: proposal + level: cluster + metric: server-proposals-applied-sum-increase + source: rancher-monitoring +spec: + expression: sum(increase(etcd_server_proposals_applied_total[5m])) by (instance) + legendFormat: proposals applied([[instance]]) + description: etcd server proposals applied sum increase +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-server-proposals-failed-sum-increase + labels: + app: metric-expression + component: etcd + details: "false" + graph: proposal + level: cluster + metric: server-proposals-failed-sum-increase + source: rancher-monitoring +spec: + expression: sum(increase(etcd_server_proposals_failed_total[5m])) + legendFormat: Proposals failed + description: etcd server proposals failed sum increase +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-server-proposals-failed-sum-increase-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: proposal + level: cluster + metric: server-proposals-failed-sum-increase + source: rancher-monitoring +spec: + expression: sum(increase(etcd_server_proposals_failed_total[5m])) by (instance) + legendFormat: proposals failed([[instance]]) + description: etcd server proposals failed sum increase +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-server-proposals-pending-sum-increase + labels: + app: metric-expression + component: etcd + details: "false" + graph: proposal + level: cluster + metric: server-proposals-pending-sum-increase + source: rancher-monitoring +spec: + expression: sum(increase(etcd_server_proposals_pending[5m])) + legendFormat: Proposals pending + description: etcd server proposals pending sum increase +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-server-proposals-pending-sum-increase-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: proposal + level: cluster + metric: server-proposals-pending-sum-increase + source: rancher-monitoring +spec: + expression: sum(increase(etcd_server_proposals_pending[5m])) by (instance) + legendFormat: proposals pending([[instance]]) + description: etcd server proposals pending sum increase +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-disk-wal-fsync-duration-seconds-sum-quantile + labels: + app: metric-expression + component: etcd + details: "false" + graph: sync-duration + level: cluster + metric: disk-wal-fsync-duration-seconds-sum-quantile + source: rancher-monitoring +spec: + expression: sum(histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m]))) + + legendFormat: WAL fsync + description: etcd disk wal fsync duration seconds sum quantile +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-disk-wal-fsync-duration-seconds-sum-quantile-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: sync-duration + level: cluster + metric: disk-wal-fsync-duration-seconds-sum-quantile + source: rancher-monitoring +spec: + expression: sum(histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m]))) + by (instance) + legendFormat: WAL fsync([[instance]]) + description: etcd disk wal fsync duration seconds sum quantile +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-grpc-request-error-percent + labels: + app: metric-expression + component: etcd + details: "false" + level: cluster + metric: grpc-request-error-percent + source: rancher-monitoring +spec: + expression: sum(rate(grpc_server_handled_total{grpc_code!="OK"}[5m])) / sum(rate(grpc_server_handled_total[5m])) + + legendFormat: Rpc failed rate + description: etcd grpc request error percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-grpc-request-error-percent-details + labels: + app: metric-expression + component: etcd + details: "true" + level: cluster + metric: grpc-request-error-percent + source: rancher-monitoring +spec: + expression: sum(rate(grpc_server_handled_total{grpc_code!="OK"}[5m])) by (instance) + / sum(rate(grpc_server_handled_total[5m])) by (instance) + legendFormat: RPC failed rate([[instance]]) + description: etcd grpc request error percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-disk-commit-duration-seconds-sum-quantile + labels: + app: metric-expression + component: etcd + details: "false" + graph: sync-duration + level: cluster + metric: disk-commit-duration-seconds-sum-quantile + source: rancher-monitoring +spec: + expression: sum(histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[5m]))) + + legendFormat: DB fsync + description: etcd disk commit duration seconds sum quantile +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-disk-commit-duration-seconds-sum-quantile-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: sync-duration + level: cluster + metric: disk-commit-duration-seconds-sum-quantile + source: rancher-monitoring +spec: + expression: sum(histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[5m]))) + by (instance) + legendFormat: DB fsync([[instance]]) + description: etcd disk commit duration seconds sum quantile +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-grpc-request-slow-quantile + labels: + app: metric-expression + component: etcd + details: "false" + level: cluster + metric: grpc-request-slow-quantile + source: rancher-monitoring +spec: + expression: sum(histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{grpc_type="unary"}[5m])))) + + legendFormat: Request slow" + description: etcd grpc request slow quantile +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-grpc-request-slow-quantile-details + labels: + app: metric-expression + component: etcd + details: "true" + level: cluster + metric: grpc-request-slow-quantile + source: rancher-monitoring +spec: + expression: sum(histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{grpc_type="unary"}[5m])))) + by (instance) + legendFormat: Request slow([[instance]]) + description: etcd grpc request slow quantile +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-active-watch-stream + labels: + app: metric-expression + component: etcd + details: "false" + graph: etcd-stream + level: cluster + metric: active-watch-stream + source: rancher-monitoring +spec: + expression: sum(grpc_server_started_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) + - sum(grpc_server_handled_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) + + legendFormat: Watch streams + description: Etcd watch stream +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-active-watch-stream-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: etcd-stream + level: cluster + metric: active-watch-stream + source: rancher-monitoring +spec: + expression: sum(grpc_server_started_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) + by (instance) - sum(grpc_server_handled_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) + by (instance) + legendFormat: Watch streams([[instance]]) + description: Etcd watch stream +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-lease-watch-stream + labels: + app: metric-expression + component: etcd + details: "false" + graph: etcd-stream + level: cluster + metric: lease-watch-stream + source: rancher-monitoring +spec: + expression: sum(grpc_server_started_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) + - sum(grpc_server_handled_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) + + legendFormat: Lease watch stream + description: Etcd lease stream +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-lease-watch-stream-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: etcd-stream + level: cluster + metric: lease-watch-stream + source: rancher-monitoring +spec: + expression: sum(grpc_server_started_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) + by (instance) - sum(grpc_server_handled_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) + by (instance) + legendFormat: Lease watch stream([[instance]]) + description: Etcd lease stream +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-peer-traffic-in + labels: + app: metric-expression + component: etcd + details: "false" + graph: etcd-peer-traffic + level: cluster + metric: peer-traffic-in + source: rancher-monitoring +spec: + expression: sum(rate(etcd_network_peer_received_bytes_total[5m])) * 8 / 1024 + legendFormat: Traffic in" + description: Etcd peer traffic in +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-peer-traffic-in-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: etcd-peer-traffic + level: cluster + metric: peer-traffic-in + source: rancher-monitoring +spec: + expression: sum(rate(etcd_network_peer_received_bytes_total[5m])) by (instance) + * 8 / 1024 + legendFormat: Traffic in([[instance]]) + description: Etcd peer traffic in +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-peer-traffic-out + labels: + app: metric-expression + component: etcd + details: "false" + graph: etcd-peer-traffic + level: cluster + metric: peer-traffic-out + source: rancher-monitoring +spec: + expression: sum(rate(etcd_network_peer_sent_bytes_total[5m])) + legendFormat: Traffic out" + description: Etcd peer traffic out +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-peer-traffic-out-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: etcd-peer-traffic + level: cluster + metric: peer-traffic-out + source: rancher-monitoring +spec: + expression: sum(rate(etcd_network_peer_sent_bytes_total[5m])) by (instance) + legendFormat: Traffic out([[instance]]) + description: Etcd peer traffic out +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-proposal-failure-rate + labels: + app: metric-expression + component: etcd + details: "false" + graph: proposal + level: cluster + source: rancher-monitoring +spec: + expression: sum(rate(etcd_server_proposals_failed_total[5m])) + legendFormat: Proposal failure + description: Proposal Failure Rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-proposal-failure-rate-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: proposal + level: cluster + source: rancher-monitoring +spec: + expression: sum(rate(etcd_server_proposals_failed_total[5m])) by (instance) + legendFormat: Proposal failure([[instance]]) + description: Proposal Failure Rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-rpc-rate + labels: + app: metric-expression + component: etcd + details: "false" + graph: rpc-rate + level: cluster + source: rancher-monitoring +spec: + expression: sum(rate(grpc_server_started_total{grpc_type="unary"}[5m])) + legendFormat: RPC rate + description: rpc-rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-rpc-rate-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: rpc-rate + level: cluster + source: rancher-monitoring +spec: + expression: sum(rate(grpc_server_started_total{grpc_type="unary"}[5m])) by (instance) + legendFormat: Rpc rate([[instance]]) + description: rpc-rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-rpc-rate-failed + labels: + app: metric-expression + component: etcd + details: "false" + graph: rpc-rate + level: cluster + source: rancher-monitoring +spec: + expression: sum(rate(grpc_server_handled_total{grpc_type="unary",grpc_code!="OK"}[5m])) + + legendFormat: Rpc failed rate + description: rpc-rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-rpc-rate-failed-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: rpc-rate + level: cluster + source: rancher-monitoring +spec: + expression: sum(rate(grpc_server_handled_total{grpc_type="unary",grpc_code!="OK"}[5m])) + by (instance) + legendFormat: Rpc failed rate([[instance]]) + description: rpc-rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-latency-distributions-of-commit-called-by-backend + labels: + app: metric-expression + component: etcd + details: "false" + graph: disk-operate + level: cluster + source: rancher-monitoring +spec: + expression: sum(rate(etcd_disk_backend_commit_duration_seconds_sum[1m])) + legendFormat: Commit latency called by backend + description: The latency distributions of commit called by backend +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-latency-distributions-of-commit-called-by-backend-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: disk-operate + level: cluster + source: rancher-monitoring +spec: + expression: sum(rate(etcd_disk_backend_commit_duration_seconds_sum[1m])) by (instance) + legendFormat: Commit latency called by backend([[instance]]) + description: The latency distributions of commit called by backend +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-latency-distributions-of-fsync-called-by-wal + labels: + app: metric-expression + component: etcd + details: "false" + graph: disk-operate + level: cluster + source: rancher-monitoring +spec: + expression: sum(rate(etcd_disk_wal_fsync_duration_seconds_sum[1m])) + legendFormat: Fsync latency called by wal + description: The latency distributions of fsync called by wal +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: etcd-latency-distributions-of-fsync-called-by-wal-details + labels: + app: metric-expression + component: etcd + details: "true" + graph: disk-operate + level: cluster + source: rancher-monitoring +spec: + expression: sum(rate(etcd_disk_wal_fsync_duration_seconds_sum[1m])) by (instance) + legendFormat: Fsync latency called by wal([[instance]]) + description: The latency distributions of fsync called by wal +--- diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionfluentd.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionfluentd.yaml new file mode 100644 index 0000000000000000000000000000000000000000..748deb70afd7d8cbe46479c0a7ed34fafbb4b74a --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionfluentd.yaml @@ -0,0 +1,128 @@ +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: input-record-number + labels: + app: metric-expression + component: fluentd + details: "false" + level: cluster + metric: input-record + source: rancher-monitoring +spec: + expression: sum(rate(fluentd_input_status_num_records_total[5m])) + legendFormat: Input record number + description: Fluentd input status num records total +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: input-record-number-details + labels: + app: metric-expression + component: fluentd + details: "true" + level: cluster + metric: input-record + source: rancher-monitoring +spec: + expression: sum(rate(fluentd_input_status_num_records_total[5m])) by (instance) + legendFormat: Input record number([[instance]]) + description: Fluentd input status num records total +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: output-record-number + labels: + app: metric-expression + component: fluentd + details: "false" + level: cluster + metric: output-record + source: rancher-monitoring +spec: + expression: sum(rate(fluentd_output_status_num_records_total[5m])) + legendFormat: Output record number + description: Fluentd output status num records total +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: output-record-number-details + labels: + app: metric-expression + component: fluentd + details: "true" + level: cluster + metric: output-record + source: rancher-monitoring +spec: + expression: sum(rate(fluentd_output_status_num_records_total[5m])) by (instance) + legendFormat: Output record number([[instance]]) + description: Fluentd output status num records total +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: output-errors + labels: + app: metric-expression + component: fluentd + details: "false" + level: cluster + metric: output-errors + source: rancher-monitoring +spec: + expression: sum(rate(fluentd_output_status_num_errors[5m])) + legendFormat: Plugin Output errors + description: Fluentd output errors number +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: output-errors-details + labels: + app: metric-expression + component: fluentd + details: "true" + level: cluster + metric: output-errors + source: rancher-monitoring +spec: + expression: sum(rate(fluentd_output_status_num_errors[5m])) by (type) + legendFormat: Plugin([[type]]) + description: Fluentd output errors number +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: buffer-queue-length + labels: + app: metric-expression + component: fluentd + details: "false" + level: cluster + metric: buffer-queue-length + source: rancher-monitoring +spec: + expression: sum(rate(fluentd_output_status_buffer_queue_length[5m])) + legendFormat: Buffer queue + description: Fluentd Buffer queue length +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: buffer-queue-length-details + labels: + app: metric-expression + component: fluentd + details: "true" + level: cluster + metric: buffer-queue-length + source: rancher-monitoring +spec: + expression: sum(rate(fluentd_output_status_buffer_queue_length[5m])) by (instance) + legendFormat: '[[instance]]' + description: Fluentd Buffer queue length +--- diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressioningresscontroller.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressioningresscontroller.yaml new file mode 100644 index 0000000000000000000000000000000000000000..00a32ff6beebebd82a65e987b4a1c4d5d22463d7 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressioningresscontroller.yaml @@ -0,0 +1,256 @@ +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-nginx-connection-reading + labels: + app: metric-expression + component: ingresscontroller + details: "false" + graph: nginx-connection + level: cluster + source: rancher-monitoring +spec: + expression: sum(nginx_connections{state="reading"}) + legendFormat: Reading connections + description: ingresscontroller nginx connection reading +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-nginx-connection-reading-details + labels: + app: metric-expression + component: ingresscontroller + details: "true" + graph: nginx-connection + level: cluster + source: rancher-monitoring +spec: + expression: sum(nginx_connections{state="reading"}) by (instance) + legendFormat: Reading connections + description: ingresscontroller nginx connection reading +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-nginx-connection-waiting + labels: + app: metric-expression + component: ingresscontroller + details: "false" + graph: nginx-connection + level: cluster + source: rancher-monitoring +spec: + expression: sum(nginx_connections{state="waiting"}) + legendFormat: Nginx waiting connection + description: ingresscontroller nginx connection waiting +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-nginx-connection-waiting-details + labels: + app: metric-expression + component: ingresscontroller + details: "true" + graph: nginx-connection + level: cluster + source: rancher-monitoring +spec: + expression: sum(nginx_connections{state="waiting"}) by (instance) + legendFormat: Nginx waiting connection + description: ingresscontroller nginx connection waiting +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-nginx-connection-writing + labels: + app: metric-expression + component: ingresscontroller + details: "false" + graph: nginx-connection + level: cluster + source: rancher-monitoring +spec: + expression: sum(nginx_connections{state="writing"}) + legendFormat: Writing connections + description: ingresscontroller nginx connection writing +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-nginx-connection-writing-details + labels: + app: metric-expression + component: ingresscontroller + details: "true" + graph: nginx-connection + level: cluster + source: rancher-monitoring +spec: + expression: sum(nginx_connections{state="writing"}) by (instance) + legendFormat: Writing connections + description: ingresscontroller nginx connection writing +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-nginx-connection-accepted + labels: + app: metric-expression + component: ingresscontroller + details: "false" + graph: nginx-connection + level: cluster + source: rancher-monitoring +spec: + expression: sum(nginx_connections{state="accepted"}) + legendFormat: Accepted connections + description: ingresscontroller nginx connection accepted +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-nginx-connection-accepted-details + labels: + app: metric-expression + component: ingresscontroller + details: "true" + graph: nginx-connection + level: cluster + source: rancher-monitoring +spec: + expression: sum(nginx_connections{state="accepted"}) by (instance) + legendFormat: Accepted connections + description: ingresscontroller nginx connection accepted +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-nginx-connection-active + labels: + app: metric-expression + component: ingresscontroller + details: "false" + graph: nginx-connection + level: cluster + source: rancher-monitoring +spec: + expression: sum(nginx_connections{state="active"}) + legendFormat: Active connections + description: ingresscontroller nginx connection active +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-nginx-connection-active-details + labels: + app: metric-expression + component: ingresscontroller + details: "true" + graph: nginx-connection + level: cluster + source: rancher-monitoring +spec: + expression: sum(nginx_connections{state="active"}) by (instance) + legendFormat: Active connections + description: ingresscontroller nginx connection active +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-nginx-connection-handled + labels: + app: metric-expression + component: ingresscontroller + details: "false" + graph: nginx-connection + level: cluster + source: rancher-monitoring +spec: + expression: sum(nginx_connections{state="handled"}) + legendFormat: Handled connections + description: ingresscontroller nginx connection handled +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-nginx-connection-handled-details + labels: + app: metric-expression + component: ingresscontroller + details: "true" + graph: nginx-connection + level: cluster + source: rancher-monitoring +spec: + expression: sum(nginx_connections{state="handled"}) by (instance) + legendFormat: Handled connections + description: ingresscontroller nginx connection handled +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-upstream-response-seconds-by-host + labels: + app: metric-expression + component: ingresscontroller + details: "false" + level: cluster + metric: upstream-response-seconds + source: rancher-monitoring +spec: + expression: sort_desc(max(upstream_response_time_seconds_sum) by (host, path)) + legendFormat: Upstream response seconds(host:[[host]] path:[[path]]) + description: ingresscontroller nginx upstream response seconds by host +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-upstream-response-seconds-by-host-details + labels: + app: metric-expression + component: ingresscontroller + details: "true" + level: cluster + metric: upstream-response-seconds + source: rancher-monitoring +spec: + expression: sort_desc(max(upstream_response_time_seconds_sum) by (host, path)) + legendFormat: Upstream response seconds(host:[[host]] path:[[path]]) + description: ingresscontroller nginx upstream response seconds by host +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-nginx-process-seconds-by-path + labels: + app: metric-expression + component: ingresscontroller + details: "false" + level: cluster + metric: request-process-seconds + source: rancher-monitoring +spec: + expression: max(request_duration_seconds_bucket{le="1"}) by (host, path) + legendFormat: Request duration(host:[[host]] path:[[path]]) + description: ingresscontroller nginx request duration by path +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: ingresscontroller-nginx-process-seconds-by-path-details + labels: + app: metric-expression + component: ingresscontroller + details: "true" + level: cluster + metric: request-process-seconds + source: rancher-monitoring +spec: + expression: max(request_duration_seconds_bucket{le="1"}) by (host, path) + legendFormat: Request duration(host:[[host]] path:[[path]]) + description: ingresscontroller nginx request duration by path +--- diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionnode.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionnode.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f236adbd3823bd7035f59e574c653b418ce6da7 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionnode.yaml @@ -0,0 +1,572 @@ +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-transmit-bytes-sum-rate + labels: + app: metric-expression + component: node + details: "false" + graph: network-io + level: cluster + metric: network-transmit-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m])) + * 8 / 1024 + legendFormat: Transmit + description: node network transmit bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-transmit-bytes-sum-rate-details + labels: + app: metric-expression + component: node + details: "true" + graph: network-io + level: cluster + metric: network-transmit-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m])) + by (device) * 8 / 1024 + legendFormat: '[[device]]' + description: node network transmit bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-receive-packets-dropped-sum-rate + labels: + app: metric-expression + component: node + details: "false" + graph: network-packet + level: cluster + metric: network-receive-packets-dropped-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m])) + + legendFormat: Receive packets + description: node network receive packets dropped sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-receive-packets-dropped-sum-rate-details + labels: + app: metric-expression + component: node + details: "true" + graph: network-packet + level: cluster + metric: network-receive-packets-dropped-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m])) + by (device) + legendFormat: Receive packets([[device]]) + description: node network receive packets dropped sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-transmit-packets-sum-rate + labels: + app: metric-expression + component: node + details: "false" + graph: network-packet + level: cluster + metric: network-transmit-packets-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_packets_total{instance=~"$instance"}[5m])) + + legendFormat: Transmit packets + description: node network transmit packets sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-transmit-packets-sum-rate-details + labels: + app: metric-expression + component: node + details: "true" + graph: network-packet + level: cluster + metric: network-transmit-packets-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_packets_total{instance=~"$instance"}[5m])) + by (device) + legendFormat: Transmit packets([[device]]) + description: node network transmit packets sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-disk-io-writes-bytes-sum-rate + labels: + app: metric-expression + component: node + details: "false" + graph: disk-io + level: cluster + metric: disk-io-writes-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_disk_written_bytes_total{instance=~"$instance"}[5m])) + * 8 / 1024 + legendFormat: Write + description: node disk io writes bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-disk-io-writes-bytes-sum-rate-details + labels: + app: metric-expression + component: node + details: "true" + graph: disk-io + level: cluster + metric: disk-io-writes-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_disk_written_bytes_total{instance=~"$instance"}[5m])) + by (device) * 8 / 1024 + legendFormat: Write([[device]]) + description: node disk io writes bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-disk-io-reads-bytes-sum-rate + labels: + app: metric-expression + component: node + details: "false" + graph: disk-io + level: cluster + metric: disk-io-reads-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_disk_read_bytes_total{instance=~"$instance"}[5m])) by + () * 8 / 1024 + legendFormat: Read + description: node disk io reads bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-disk-io-reads-bytes-sum-rate-details + labels: + app: metric-expression + component: node + details: "true" + graph: disk-io + level: cluster + metric: disk-io-reads-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_disk_read_bytes_total{instance=~"$instance"}[5m])) by + (device) * 8 / 1024 + legendFormat: Read([[device]]) + description: node disk io reads bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-fs-usage-percent + labels: + app: metric-expression + component: node + details: "false" + level: cluster + metric: fs-usage-percent + source: rancher-monitoring +spec: + expression: (sum(node_filesystem_size_bytes{device!="rootfs",instance=~"$instance"}) + - sum(node_filesystem_free_bytes{device!="rootfs",instance=~"$instance"}) + ) / sum(node_filesystem_size_bytes{device!="rootfs",instance=~"$instance"}) + + legendFormat: Disk usage + description: node fs usage percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-fs-usage-percent-details + labels: + app: metric-expression + component: node + details: "true" + level: cluster + metric: fs-usage-percent + source: rancher-monitoring +spec: + expression: (sum(node_filesystem_size_bytes{device!="rootfs",instance=~"$instance"}) + by (device) - sum(node_filesystem_free_bytes{device!="rootfs",instance=~"$instance"}) + by (device)) / sum(node_filesystem_size_bytes{device!="rootfs",instance=~"$instance"}) + by (device) + legendFormat: '[[device]]' + description: node fs usage percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-receive-packets-sum-rate + labels: + app: metric-expression + component: node + details: "false" + graph: network-packet + level: cluster + metric: network-receive-packets-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m])) + + legendFormat: Receive packets + description: node network receive packets sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-receive-packets-sum-rate-details + labels: + app: metric-expression + component: node + details: "true" + graph: network-packet + level: cluster + metric: network-receive-packets-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m])) + by (device) + legendFormat: Receive packets([[device]]) + description: node network receive packets sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-transmit-errors-sum-rate + labels: + app: metric-expression + component: node + details: "false" + graph: network-packet + level: cluster + metric: network-transmit-errors-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m])) + + legendFormat: Transmit errors + description: node network transmit errors sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-transmit-errors-sum-rate-details + labels: + app: metric-expression + component: node + details: "true" + graph: network-packet + level: cluster + metric: network-transmit-errors-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m])) + by (device) + legendFormat: Transmit errors([[device]]) + description: node network transmit errors sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-cpu-load-5 + labels: + app: metric-expression + component: node + details: "false" + graph: cpu-load + level: cluster + metric: cpu-load-5 + source: rancher-monitoring +spec: + expression: sum(node_load1{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"}) + + legendFormat: Load1 + description: node cpu load 1 +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-cpu-load-5-details + labels: + app: metric-expression + component: node + details: "true" + graph: cpu-load + level: cluster + metric: cpu-load-5 + source: rancher-monitoring +spec: + expression: sum(node_load1{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"}) + + legendFormat: Load1 + description: node cpu load 1 +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-cpu-load-15 + labels: + app: metric-expression + component: node + details: "false" + graph: cpu-load + level: cluster + metric: cpu-load-15 + source: rancher-monitoring +spec: + expression: sum(node_load15{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"}) + + legendFormat: Load15 + description: node cpu load 15 +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-cpu-load-15-details + labels: + app: metric-expression + component: node + details: "true" + graph: cpu-load + level: cluster + metric: cpu-load-15 + source: rancher-monitoring +spec: + expression: sum(node_load15{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"}) + + legendFormat: Load15 + description: node cpu load 15 +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-cpu-usage-seconds-sum-rate + labels: + app: metric-expression + component: node + details: "false" + level: cluster + metric: cpu-usage-seconds-sum-rate + source: rancher-monitoring +spec: + expression: 1 - (avg(irate(node_cpu_seconds_total{mode="idle", instance=~"$instance"}[5m])) by (instance)) + legendFormat: CPU + description: node cpu usage seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-cpu-usage-seconds-sum-rate-details + labels: + app: metric-expression + component: node + details: "true" + level: cluster + metric: cpu-usage-seconds-sum-rate + source: rancher-monitoring +spec: + expression: avg(irate(node_cpu_seconds_total{instance=~"$instance"}[5m]))by (mode) + legendFormat: '[[mode]]' + description: node cpu usage seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-memory-usage-percent + labels: + app: metric-expression + component: node + details: "false" + level: cluster + metric: memory-usage-percent + source: rancher-monitoring +spec: + expression: 1 - sum(node_memory_MemAvailable_bytes{instance=~"$instance"}) + / sum(node_memory_MemTotal_bytes{instance=~"$instance"}) + legendFormat: Memory usage + description: node memory usage percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-memory-usage-percent-details + labels: + app: metric-expression + component: node + details: "true" + level: cluster + metric: memory-usage-percent + source: rancher-monitoring +spec: + expression: 1 - sum(node_memory_MemAvailable_bytes{instance=~"$instance"}) + / sum(node_memory_MemTotal_bytes{instance=~"$instance"}) + legendFormat: Memory usage + description: node memory usage percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-receive-bytes-sum-rate + labels: + app: metric-expression + component: node + details: "false" + graph: network-io + level: cluster + metric: network-receive-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m])) + * 8 / 1024 + legendFormat: Receive + description: node network receive bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-receive-bytes-sum-rate-details + labels: + app: metric-expression + component: node + details: "true" + graph: network-io + level: cluster + metric: network-receive-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m])) + by (device) * 8 / 1024 + legendFormat: Receive([[device]]) + description: node network receive bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-receive-errors-sum-rate + labels: + app: metric-expression + component: node + details: "false" + graph: network-packet + level: cluster + metric: network-receive-errors-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m])) + + legendFormat: Receive packets + description: node network receive errors sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-receive-errors-sum-rate-details + labels: + app: metric-expression + component: node + details: "true" + graph: network-packet + level: cluster + metric: network-receive-errors-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_receive_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m])) + by (device) + legendFormat: Receive packets([[device]]) + description: node network receive errors sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-cpu-load-5 + labels: + app: metric-expression + component: node + details: "false" + graph: cpu-load + level: cluster + metric: cpu-load-5 + source: rancher-monitoring +spec: + expression: sum(node_load5{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"}) + + legendFormat: Load5 + description: node cpu load 5 +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-cpu-load-1-details + labels: + app: metric-expression + component: node + details: "true" + graph: cpu-load + level: cluster + metric: cpu-load-1 + source: rancher-monitoring +spec: + expression: sum(node_load5{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"}) + + legendFormat: Load1 + description: node cpu load 1 +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-transmit-packets-dropped-sum-rate + labels: + app: metric-expression + component: node + details: "false" + graph: network-packet + level: cluster + metric: network-transmit-packets-dropped-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m])) + + legendFormat: Transmit dropped + description: node network transmit packets dropped sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: node-network-transmit-packets-dropped-sum-rate-details + labels: + app: metric-expression + component: node + details: "true" + graph: network-packet + level: cluster + metric: network-transmit-packets-dropped-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(node_network_transmit_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"$instance"}[5m])) + by (device) + legendFormat: Transmit dropped([[device]]) + description: node network transmit packets dropped sum rate +--- diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionpod.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionpod.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0339163a002acc0e1afbe17dd455767abf5ce9d7 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionpod.yaml @@ -0,0 +1,600 @@ +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-cpu-cfs-throttled-seconds-sum-rate + labels: + app: metric-expression + component: pod + details: "false" + graph: container-cpu-usage-details + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: CPU cfs throttled + description: pod cpu cfs throttled seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-cpu-cfs-throttled-seconds-sum-rate-details + labels: + app: metric-expression + component: pod + details: "true" + graph: container-cpu-usage-details + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (container_name) + legendFormat: CPU cfs throttled([[container_name]]) + description: pod cpu cfs throttled seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-memory-usage-percent + labels: + app: metric-expression + component: pod + details: "false" + level: project + metric: memory-usage-percent + source: rancher-monitoring +spec: + expression: sum(container_memory_working_set_bytes{namespace=~"$namespace", pod_name=~"$podName", + container_name!=""}) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace=~"$namespace", + pod=~"$InstanceName"},"pod_name", "", "pod")) + legendFormat: Memory + description: pod memory usage percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-memory-usage-percent-details + labels: + app: metric-expression + component: pod + details: "true" + level: project + metric: memory-usage-percent + source: rancher-monitoring +spec: + expression: sum(container_memory_working_set_bytes{namespace=~"$namespace", pod_name=~"$podName", + container_name!=""}) by (container_name) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace=~"$namespace", + pod=~"$InstanceName"},"pod_name", "", "pod")) by (container_name) + legendFormat: Memory([[container_name]]) + description: pod memory usage percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-fs-bytes-sum + labels: + app: metric-expression + component: pod + details: "false" + level: project + metric: fs-bytes-sum + source: rancher-monitoring +spec: + expression: sum(container_fs_usage_bytes{namespace=~"$namespace", pod_name=~"$podName", + container_name!=""}) + legendFormat: Filesystem usage + description: pod fs bytes sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-fs-bytes-sum-details + labels: + app: metric-expression + component: pod + details: "true" + level: project + metric: fs-bytes-sum + source: rancher-monitoring +spec: + expression: sum(container_fs_usage_bytes{namespace=~"$namespace", pod_name=~"$podName", + container_name!=""}) by (container_name) + legendFormat: Filesystem usage([[container_name]]) + description: pod fs bytes sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-receive-packets-sum-rate + labels: + app: metric-expression + component: pod + details: "false" + graph: network-packet + level: project + metric: network-receive-packets-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_packets_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: Receive packets + description: pod network receive packets sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-receive-packets-sum-rate-details + labels: + app: metric-expression + component: pod + details: "true" + graph: network-packet + level: project + metric: network-receive-packets-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_packets_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (container_name) + legendFormat: Receive packets([[container_name]]) + description: pod network receive packets sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-transmit-packets-sum-rate + labels: + app: metric-expression + component: pod + details: "false" + graph: network-packet + level: project + metric: network-transmit-packets-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_packets_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: Transmit packets + description: pod network transmit packets sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-transmit-packets-sum-rate-details + labels: + app: metric-expression + component: pod + details: "true" + graph: network-packet + level: project + metric: network-transmit-packets-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_packets_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (container_name) + legendFormat: Transmit packets([[container_name]]) + description: pod network transmit packets sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-cpu-user-seconds-sum-rate + labels: + app: metric-expression + component: pod + details: "false" + graph: container-cpu-usage-details + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_user_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: CPU user seconds + description: pod cpu user seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-cpu-user-seconds-sum-rate-details + labels: + app: metric-expression + component: pod + details: "true" + graph: container-cpu-usage-details + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_user_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (container_name) + legendFormat: CPU user seconds([[container_name]]) + description: pod cpu user seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-disk-io-reads-bytes-sum-rate + labels: + app: metric-expression + component: pod + details: "false" + graph: disk-io + level: project + metric: disk-io-reads-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_fs_reads_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) * 8 / 1024 + legendFormat: Read + description: pod disk io reads bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-disk-io-reads-bytes-sum-rate-details + labels: + app: metric-expression + component: pod + details: "true" + graph: disk-io + level: project + metric: disk-io-reads-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_fs_reads_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (container_name) * 8 / 1024 + legendFormat: Read([[container_name]]) + description: pod disk io reads bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-receive-bytes-sum-rate + labels: + app: metric-expression + component: pod + details: "false" + graph: network-io + level: project + metric: network-receive-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) * 8 / 1024 + legendFormat: Receive + description: pod network receive bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-receive-bytes-sum-rate-details + labels: + app: metric-expression + component: pod + details: "true" + graph: network-io + level: project + metric: network-receive-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (container_name) * 8 / 1024 + legendFormat: Receive + description: pod network receive bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-transmit-bytes-sum-rate + labels: + app: metric-expression + component: pod + details: "false" + graph: network-io + level: project + metric: network-transmit-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) * 8 / 1024 + legendFormat: Transmit + description: pod network transmit bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-transmit-bytes-sum-rate-details + labels: + app: metric-expression + component: pod + details: "true" + graph: network-io + level: project + metric: network-transmit-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (container_name) * 8 / 1024 + legendFormat: Transmit + description: pod network transmit bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-receive-packets-dropped-sum-rate + labels: + app: metric-expression + component: pod + details: "false" + graph: network-packet + level: project + metric: network-receive-packets-dropped-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: Receive dropped + description: pod network receive packets dropped sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-receive-packets-dropped-sum-rate-details + labels: + app: metric-expression + component: pod + details: "true" + graph: network-packet + level: project + metric: network-receive-packets-dropped-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (container_name) + legendFormat: Receive dropped + description: pod network receive packets dropped sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-memory-usage-bytes-sum + labels: + app: metric-expression + component: pod + details: "false" + level: project + metric: memory-usage-bytes-sum + source: rancher-monitoring +spec: + expression: sum(container_memory_working_set_bytes{name!~"POD", namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}) + legendFormat: Memory usage + description: pod memory usage bytes sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-memory-usage-bytes-sum-details + labels: + app: metric-expression + component: pod + details: "true" + level: project + metric: memory-usage-bytes-sum + source: rancher-monitoring +spec: + expression: sum(container_memory_working_set_bytes{name!~"POD", namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}) by (container_name) + legendFormat: Memory usage([[container_name]]) + description: pod memory usage bytes sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-disk-io-writes-bytes-sum-rate + labels: + app: metric-expression + component: pod + details: "false" + graph: disk-io + level: project + metric: disk-io-writes-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_fs_writes_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) * 8 / 1024 + legendFormat: Write + description: pod disk io writes bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-disk-io-writes-bytes-sum-rate-details + labels: + app: metric-expression + component: pod + details: "true" + graph: disk-io + level: project + metric: disk-io-writes-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_fs_writes_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (container_name) * 8 / 1024 + legendFormat: Write([[container_name]]) + description: pod disk io writes bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-receive-errors-sum-rate + labels: + app: metric-expression + component: pod + details: "false" + graph: network-packet + level: project + metric: network-receive-errors-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_errors_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: Receive errors + description: pod network receive errors sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-receive-errors-sum-rate-details + labels: + app: metric-expression + component: pod + details: "true" + graph: network-packet + level: project + metric: network-receive-errors-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_errors_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (container_name) + legendFormat: Receive errors + description: pod network receive errors sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-cpu-usage-seconds-sum-rate + labels: + app: metric-expression + component: pod + details: "false" + graph: container-cpu-usage + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_usage_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: CPU usage + description: pod CPU usage sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-cpu-usage-seconds-sum-rate-details + labels: + app: metric-expression + component: pod + details: "true" + graph: container-cpu-usage + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_usage_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (container_name) + legendFormat: CPU usage([[container_name]]) + description: pod CPU usage sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-transmit-errors-sum-rate + labels: + app: metric-expression + component: pod + details: "false" + graph: network-packet + level: project + metric: network-transmit-errors-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_errors_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: Transmit errors + description: pod network transmit errors sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-transmit-errors-sum-rate-details + labels: + app: metric-expression + component: pod + details: "true" + graph: network-packet + level: project + metric: network-transmit-errors-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_errors_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (container_name) + legendFormat: Transmit errors + description: pod network transmit errors sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-transmit-packets-dropped-sum-rate + labels: + app: metric-expression + component: pod + details: "false" + graph: network-packet + level: project + metric: network-transmit-packets-dropped-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: Transmit dropped + description: pod network transmit packets dropped sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-network-transmit-packets-dropped-sum-rate-details + labels: + app: metric-expression + component: pod + details: "true" + graph: network-packet + level: project + metric: network-transmit-packets-dropped-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (container_name) + legendFormat: Transmit dropped + description: pod network transmit packets dropped sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-cpu-system-seconds-sum-rate + labels: + app: metric-expression + component: pod + details: "false" + graph: container-cpu-usage-details + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_system_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: CPU system seconds + description: pod cpu system seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: pod-cpu-system-seconds-sum-rate-details + labels: + app: metric-expression + component: pod + details: "true" + graph: container-cpu-usage-details + level: project + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_system_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (container_name) + legendFormat: CPU system seconds([[container_name]]) + description: pod cpu system seconds sum rate +--- diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionscheduler.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionscheduler.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5ff916394dfe891fefb1f065d08064cab97896ef --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionscheduler.yaml @@ -0,0 +1,98 @@ +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: scheduler-e-2-e-scheduling-latency-seconds-quantile + labels: + app: metric-expression + component: scheduler + details: "false" + level: cluster + metric: e-2-e-scheduling-latency-seconds-quantile + source: rancher-monitoring +spec: + expression: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket) + by (le, instance)) / 1e+06 + legendFormat: E2E latency + description: scheduler e 2 e scheduling latency seconds quantile +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: scheduler-e-2-e-scheduling-latency-seconds-quantile-details + labels: + app: metric-expression + component: scheduler + details: "true" + level: cluster + metric: e-2-e-scheduling-latency-seconds-quantile + source: rancher-monitoring +spec: + expression: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket) + by (le, instance)) / 1e+06 + legendFormat: E2E latency([[instance]]) + description: scheduler e 2 e scheduling latency seconds quantile +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: scheduler-total-preemption-attempts + labels: + app: metric-expression + component: scheduler + details: "false" + level: cluster + metric: total-preemption-attempts + source: rancher-monitoring +spec: + expression: sum(rate(scheduler_total_preemption_attempts[5m])) by (instance) + legendFormat: Preemption attempts + description: Scheduler scheduling algorithm latency seconds quantile +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: scheduler-total-preemption-attempts-details + labels: + app: metric-expression + component: scheduler + details: "true" + level: cluster + metric: total-preemption-attempts + source: rancher-monitoring +spec: + expression: sum(rate(scheduler_total_preemption_attempts[5m])) + legendFormat: Preemption attempts([[instance]]) + description: Scheduler scheduling algorithm latency seconds quantile +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: scheduler-pod-unscheduler + labels: + app: metric-expression + component: scheduler + details: "false" + level: cluster + metric: pod-unscheduler + source: rancher-monitoring +spec: + expression: sum(kube_pod_status_scheduled{condition="false"}) + legendFormat: Scheduling failed pods + description: pod unscheduler +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: scheduler-pod-unscheduler-details + labels: + app: metric-expression + component: scheduler + details: "true" + level: cluster + metric: pod-unscheduler + source: rancher-monitoring +spec: + expression: sum(kube_pod_status_scheduled{condition="false"}) + legendFormat: Scheduling failed pods + description: pod unscheduler +--- diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionworkload.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionworkload.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b484df66a1242d8b7a376c7e80162d3ae370f59b --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/expressionworkload.yaml @@ -0,0 +1,606 @@ +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-disk-io-writes-bytes-sum-rate + labels: + app: metric-expression + component: workload + details: "false" + graph: disk-io + level: project + metric: disk-io-writes-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_fs_writes_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) * 8 / 1024 + legendFormat: Write + description: workload disk io writes bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-disk-io-writes-bytes-sum-rate-details + labels: + app: metric-expression + component: workload + details: "true" + graph: disk-io + level: project + metric: disk-io-writes-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_fs_writes_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (pod_name) * 8 / 1024 + legendFormat: Write([[pod_name]]) + description: workload disk io writes bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-disk-io-reads-bytes-sum-rate + labels: + app: metric-expression + component: workload + details: "false" + graph: disk-io + level: project + metric: disk-io-reads-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_fs_reads_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) * 8 / 1024 + legendFormat: Read + description: workload disk io reads bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-disk-io-reads-bytes-sum-rate-details + labels: + app: metric-expression + component: workload + details: "true" + graph: disk-io + level: project + metric: disk-io-reads-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_fs_reads_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (pod_name) * 8 / 1024 + legendFormat: Read([[pod_name]]) + description: workload disk io reads bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-fs-bytes-sum + labels: + app: metric-expression + component: workload + details: "false" + level: project + metric: fs-bytes-sum + source: rancher-monitoring +spec: + expression: sum(container_fs_usage_bytes{namespace=~"$namespace", pod_name=~"$podName", + container_name!=""}) + legendFormat: File usage + description: workload fs bytes sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-fs-bytes-sum-details + labels: + app: metric-expression + component: workload + details: "true" + level: project + metric: fs-bytes-sum + source: rancher-monitoring +spec: + expression: sum(container_fs_usage_bytes{namespace=~"$namespace", pod_name=~"$podName", + container_name!=""}) by (pod_name) + legendFormat: pod_name]] + description: workload fs bytes sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-transmit-packets-sum-rate + labels: + app: metric-expression + component: workload + details: "false" + graph: network-packet + level: project + metric: network-transmit-packets-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_packets_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: Transmit packets + description: workload network transmit packets sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-transmit-packets-sum-rate-details + labels: + app: metric-expression + component: workload + details: "true" + graph: network-packet + level: project + metric: network-transmit-packets-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_packets_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (pod_name) + legendFormat: Transmit packets([[pod_name]]) + description: workload network transmit packets sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-receive-packets-dropped-sum-rate + labels: + app: metric-expression + component: workload + details: "false" + graph: network-packet + level: project + metric: network-receive-packets-dropped-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: Receive dropped + description: workload network receive packets dropped sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-receive-packets-dropped-sum-rate-details + labels: + app: metric-expression + component: workload + details: "true" + graph: network-packet + level: project + metric: network-receive-packets-dropped-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (pod_name) + legendFormat: Receive dropped([[pod_name]]) + description: workload network receive packets dropped sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-cpu-usage-seconds-sum-rate + labels: + app: metric-expression + component: workload + details: "false" + graph: container-cpu-usage + level: project + metric: cpu-usage-seconds-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_usage_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: CPU usage + description: workload cpu usage seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-cpu-usage-seconds-sum-rate-details + labels: + app: metric-expression + component: workload + details: "true" + graph: container-cpu-usage + level: project + metric: cpu-usage-seconds-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_usage_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (pod_name) + legendFormat: CPU usage([[pod_name]]) + description: workload cpu usage seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-cpu-system-seconds-sum-rate + labels: + app: metric-expression + component: workload + details: "false" + graph: container-cpu-usage-details + level: project + metric: cpu-system-seconds-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_system_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: CPU system seconds + description: workload cpu system seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-cpu-system-seconds-sum-rate-details + labels: + app: metric-expression + component: workload + details: "true" + graph: container-cpu-usage-details + level: project + metric: cpu-system-seconds-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_system_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (pod_name) + legendFormat: CPU system seconds([[pod_name]]) + description: workload cpu system seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-receive-bytes-sum-rate + labels: + app: metric-expression + component: workload + details: "false" + graph: network-io + level: project + metric: network-receive-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) * 8 / 1024 + legendFormat: Receive + description: workload network receive bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-receive-bytes-sum-rate-details + labels: + app: metric-expression + component: workload + details: "true" + graph: network-io + level: project + metric: network-receive-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (pod_name) * 8 / 1024 + legendFormat: Receive([[pod_name]]) + description: workload network receive bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-receive-errors-sum-rate + labels: + app: metric-expression + component: workload + details: "false" + graph: network-packet + level: project + metric: network-receive-errors-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_errors_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: Receive errors + description: workload network receive errors sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-receive-errors-sum-rate-details + labels: + app: metric-expression + component: workload + details: "true" + graph: network-packet + level: project + metric: network-receive-errors-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_errors_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (pod_name) + legendFormat: Receive errors([[pod_name]]) + description: workload network receive errors sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-receive-packets-sum-rate + labels: + app: metric-expression + component: workload + details: "false" + graph: network-packet + level: project + metric: network-receive-packets-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_packets_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: Receive packets + description: workload network receive packets sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-receive-packets-sum-rate-details + labels: + app: metric-expression + component: workload + details: "true" + graph: network-packet + level: project + metric: network-receive-packets-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_receive_packets_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (pod_name) + legendFormat: Receive packets([[pod_name]]) + description: workload network receive packets sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-memory-usage-bytes-sum + labels: + app: metric-expression + component: workload + details: "false" + level: project + metric: memory-usage-bytes-sum + source: rancher-monitoring +spec: + expression: sum(container_memory_working_set_bytes{name!~"POD", namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}) + legendFormat: Memory + description: workload memory usage bytes sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-memory-usage-bytes-sum-details + labels: + app: metric-expression + component: workload + details: "true" + level: project + metric: memory-usage-bytes-sum + source: rancher-monitoring +spec: + expression: sum(container_memory_working_set_bytes{name!~"POD", namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}) by (pod_name) + legendFormat: '[[pod_name]]' + description: workload memory usage bytes sum +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-memory-usage-percent + labels: + app: metric-expression + component: workload + details: "false" + level: project + metric: memory-usage-percent + source: rancher-monitoring +spec: + expression: sum(container_memory_working_set_bytes{namespace=~"$namespace", pod_name=~"$podName", + container_name!=""}) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace=~"$namespace", + pod=~"$InstanceName"},"pod_name", "", "pod")) + legendFormat: Usage percent + description: workload memory usage percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-memory-usage-percent-details + labels: + app: metric-expression + component: workload + details: "true" + level: project + metric: memory-usage-percent + source: rancher-monitoring +spec: + expression: sum(container_memory_working_set_bytes{namespace=~"$namespace", pod_name=~"$podName", + container_name!=""}) by (pod_name) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace=~"$namespace", + pod=~"$InstanceName"},"pod_name", "", "pod")) by (pod_name) + legendFormat: Usage percent([[pod_name]]) + description: workload memory usage percent +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-transmit-bytes-sum-rate + labels: + app: metric-expression + component: workload + details: "false" + graph: network-io + level: project + metric: network-transmit-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) * 8 / 1024 + legendFormat: Transmit + description: workload network transmit bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-transmit-bytes-sum-rate-details + labels: + app: metric-expression + component: workload + details: "true" + graph: network-io + level: project + metric: network-transmit-bytes-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_bytes_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (pod_name) * 8 / 1024 + legendFormat: Transmit([[pod_name]]) + description: workload network transmit bytes sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-transmit-errors-sum-rate + labels: + app: metric-expression + component: workload + details: "false" + graph: network-packet + level: project + metric: network-transmit-errors-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_errors_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: Transmit errors + description: workload network transmit errors sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-transmit-errors-sum-rate-details + labels: + app: metric-expression + component: workload + details: "true" + graph: network-packet + level: project + metric: network-transmit-errors-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_errors_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (pod_name) + legendFormat: Transmit errors([[pod_name]]) + description: workload network transmit errors sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-transmit-packets-dropped-sum-rate + labels: + app: metric-expression + component: workload + details: "false" + graph: network-packet + level: project + metric: network-transmit-packets-dropped-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: Transmit dropped + description: workload network transmit packets dropped sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-network-transmit-packets-dropped-sum-rate-details + labels: + app: metric-expression + component: workload + details: "true" + graph: network-packet + level: project + metric: network-transmit-packets-dropped-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_network_transmit_packets_dropped_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (pod_name) + legendFormat: Transmit dropped([[pod_name]]) + description: workload network transmit packets dropped sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-cpu-user-seconds-sum-rate + labels: + app: metric-expression + component: workload + details: "false" + graph: container-cpu-usage-details + level: project + metric: cpu-user-seconds-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_user_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: CPU user seconds + description: workload cpu user seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-cpu-user-seconds-sum-rate-details + labels: + app: metric-expression + component: workload + details: "true" + graph: container-cpu-usage-details + level: project + metric: cpu-user-seconds-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_user_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (pod_name) + legendFormat: CPU user seconds([[pod_name]]) + description: workload cpu user seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-cpu-cfs-throttled-seconds-sum-rate + labels: + app: metric-expression + component: workload + details: "false" + level: project + metric: cpu-cfs-throttled-seconds-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) + legendFormat: CPU cfs throttled + description: workload cpu cfs throttled seconds sum rate +--- +kind: MonitorMetric +apiVersion: management.cattle.io/v3 +metadata: + name: workload-cpu-cfs-throttled-seconds-sum-rate-details + labels: + app: metric-expression + component: workload + details: "true" + level: project + metric: cpu-cfs-throttled-seconds-sum-rate + source: rancher-monitoring +spec: + expression: sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~"$namespace",pod_name=~"$podName", + container_name!=""}[5m])) by (pod_name) + legendFormat: CPU cfs throttled([[pod_name]]) + description: workload cpu cfs throttled seconds sum rate +--- diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphapiserver.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphapiserver.yaml new file mode 100644 index 0000000000000000000000000000000000000000..46e486c5dba3199c10f96d7c212e347267db3bd8 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphapiserver.yaml @@ -0,0 +1,51 @@ +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: apiserver + cluster-graph: kube-component + name: apiserver-request-latency +spec: + resourceType: apiserver + displayResourceType: kube-component + priority: 300 + title: apiserver-request-latency + detailsMetricsSelector: + component: apiserver + details: "true" + metric: request-latency-milliseconds-avg + metricsSelector: + details: "false" + component: apiserver + metric: request-latency-milliseconds-avg + yAxis: + unit: ms +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: apiserver + cluster-graph: kube-component + name: apiserver-request-count +spec: + resourceType: apiserver + displayResourceType: kube-component + priority: 301 + title: apiserver-request-count + detailsMetricsSelector: + component: apiserver + details: "true" + metric: request-count-sum-rate + metricsSelector: + details: "false" + component: apiserver + metric: request-count-sum-rate + yAxis: + unit: number \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphcluster.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphcluster.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cad3c1ac340b20695013037c9669d7bb8908d619 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphcluster.yaml @@ -0,0 +1,171 @@ +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: cluster + name: cluster-cpu-usage +spec: + resourceType: cluster + priority: 100 + title: cluster-cpu-usage + metricsSelector: + details: "false" + component: cluster + metric: cpu-usage-seconds-sum-rate + detailsMetricsSelector: + details: "true" + component: cluster + metric: cpu-usage-seconds-sum-rate + yAxis: + unit: percent +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: cluster + name: cluster-cpu-load +spec: + resourceType: cluster + priority: 101 + title: cluster-cpu-load + metricsSelector: + details: "false" + component: cluster + graph: cpu-load + detailsMetricsSelector: + details: "true" + component: cluster + graph: cpu-load + yAxis: + unit: number +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: cluster + name: cluster-memory-usage +spec: + resourceType: cluster + priority: 102 + title: cluster-memory-usage + metricsSelector: + details: "false" + component: cluster + metric: memory-usage-percent + detailsMetricsSelector: + details: "true" + component: cluster + metric: memory-usage-percent + yAxis: + unit: percent +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: cluster + name: cluster-fs-usage-percent +spec: + resourceType: cluster + priority: 103 + title: cluster-fs-usage-percent + thresholds: 10 + metricsSelector: + details: "false" + component: cluster + metric: fs-usage-percent + detailsMetricsSelector: + details: "true" + component: cluster + metric: fs-usage-percent + yAxis: + unit: percent +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: cluster + name: cluster-disk-io +spec: + resourceType: cluster + priority: 104 + title: cluster-disk-io + thresholds: 10 + metricsSelector: + details: "false" + component: cluster + graph: disk-io + detailsMetricsSelector: + details: "true" + component: cluster + graph: disk-io + yAxis: + unit: kbps +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: cluster + name: cluster-network-io +spec: + resourceType: cluster + priority: 105 + title: cluster-network-io + thresholds: 10 + metricsSelector: + details: "false" + component: cluster + graph: network-io + detailsMetricsSelector: + details: "true" + component: cluster + graph: network-io + yAxis: + unit: kbps +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: cluster + name: cluster-network-packet +spec: + resourceType: cluster + priority: 106 + title: cluster-network-packet + thresholds: 10 + metricsSelector: + details: "false" + component: cluster + graph: network-packet + detailsMetricsSelector: + details: "true" + component: cluster + graph: network-packet + yAxis: + unit: pps \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphcontrollermanager.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphcontrollermanager.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db0fe9cd892165afab734559c50415e0b11efa6c --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphcontrollermanager.yaml @@ -0,0 +1,24 @@ +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: controllermanager + cluster-graph: kube-component + name: controllermanager-queue-depth +spec: + resourceType: controllermanager + displayResourceType: kube-component + priority: 310 + title: controllermanager-queue-depth + metricsSelector: + details: "false" + component: controllermanager + detailsMetricsSelector: + details: "true" + component: controllermanager + yAxis: + unit: number diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphetcd.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphetcd.yaml new file mode 100644 index 0000000000000000000000000000000000000000..146a82e9e993cf43f717b273e88d395ac3b41d77 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphetcd.yaml @@ -0,0 +1,279 @@ +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: etcd + name: etcd-server-leader-sum +spec: + resourceType: etcd + priority: 200 + title: etcd-server-leader-sum + description: etcd server leader sum + metricsSelector: + details: "false" + component: etcd + metric: server-leader-sum + detailsMetricsSelector: + details: "true" + component: etcd + metric: server-leader-sum + yAxis: + unit: number + graphType: singlestat +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: etcd + name: etcd-server-failed-proposal +spec: + resourceType: etcd + priority: 201 + title: etcd-server-failed-proposal + description: etcd server failed proposal + metricsSelector: + details: "false" + component: etcd + metric: server-failed-proposal + detailsMetricsSelector: + details: "true" + component: etcd + metric: server-failed-proposal + yAxis: + unit: number + graphType: singlestat +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: etcd + name: etcd-leader-change +spec: + resourceType: etcd + priority: 202 + title: etcd-leader-change + description: etcd leader change + metricsSelector: + details: "false" + component: etcd + metric: server-leader-changes-seen-sum-increase + detailsMetricsSelector: + details: "true" + component: etcd + metric: server-leader-changes-seen-sum-increase + yAxis: + unit: number + graphType: singlestat +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: etcd + name: etcd-grpc-client +spec: + resourceType: etcd + priority: 203 + title: etcd-grpc-client + description: etcd grpc client receive/send bytes sum rate + metricsSelector: + details: "false" + component: etcd + graph: rpc-client-traffic + detailsMetricsSelector: + details: "true" + component: etcd + graph: rpc-client-traffic + yAxis: + unit: kbps +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: etcd + metric: db-bytes-sum + name: etcd-db-bytes-sum +spec: + resourceType: etcd + priority: 204 + title: etcd-db-bytes-sum + description: etcd db bytes sum + metricsSelector: + details: "false" + component: etcd + metric: db-bytes-sum + detailsMetricsSelector: + details: "true" + component: etcd + metric: db-bytes-sum + yAxis: + unit: byte +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: etcd + name: etcd-stream +spec: + resourceType: etcd + priority: 205 + title: etcd-stream + description: Etcd lease/watch stream + metricsSelector: + details: "false" + component: etcd + graph: etcd-stream + detailsMetricsSelector: + details: "true" + component: etcd + graph: etcd-stream + yAxis: + unit: number +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: etcd + name: etcd-peer-traffic +spec: + resourceType: etcd + priority: 206 + title: etcd-peer-traffic + description: Etcd peer traffic in/out + metricsSelector: + details: "false" + component: etcd + graph: etcd-peer-traffic + detailsMetricsSelector: + details: "true" + component: etcd + graph: etcd-peer-traffic + yAxis: + unit: kbps +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: etcd + name: etcd-raft-proposals +spec: + resourceType: etcd + priority: 207 + title: etcd-raft-proposals + description: Etcd raft proposals + metricsSelector: + details: "false" + component: etcd + graph: proposal + detailsMetricsSelector: + details: "true" + component: etcd + graph: proposal + yAxis: + unit: number +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: etcd + name: etcd-rpc-rate +spec: + resourceType: etcd + priority: 208 + title: etcd-rpc-rate + description: Etcd rpc-rate + metricsSelector: + details: "false" + component: etcd + graph: rpc-rate + detailsMetricsSelector: + details: "true" + component: etcd + graph: rpc-rate + yAxis: + unit: ops +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: etcd + name: etcd-disk-operate +spec: + resourceType: etcd + priority: 209 + title: etcd-disk-operate + description: Etcd disk operate + metricsSelector: + details: "false" + component: etcd + graph: disk-operate + detailsMetricsSelector: + details: "true" + component: etcd + graph: disk-operate + yAxis: + unit: seconds +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: etcd + name: etcd-sync-duration +spec: + resourceType: etcd + priority: 209 + title: etcd-sync-duration + description: Etcd sync-duration + metricsSelector: + details: "false" + component: etcd + graph: sync-duration + detailsMetricsSelector: + details: "true" + component: etcd + graph: sync-duration + yAxis: + unit: seconds \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphfluentd.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphfluentd.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c4d973e7aae2c6354789b0d0acee1a0fbd64957 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphfluentd.yaml @@ -0,0 +1,103 @@ +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: fluentd + cluster-graph: rancher-component + name: fluentd-input-record-number +spec: + resourceType: fluentd + displayResourceType: rancher-component + priority: 300 + title: fluentd-input-record-number + metricsSelector: + details: "false" + component: fluentd + metric: input-record + detailsMetricsSelector: + details: "true" + component: fluentd + metric: input-record + yAxis: + unit: number +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: fluentd + cluster-graph: rancher-component + name: fluentd-output-record-number +spec: + resourceType: fluentd + displayResourceType: rancher-component + priority: 301 + title: fluentd-output-record-number + metricsSelector: + details: "false" + component: fluentd + metric: output-record + detailsMetricsSelector: + details: "true" + component: fluentd + metric: output-record + yAxis: + unit: number +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: fluentd + cluster-graph: rancher-component + name: fluentd-output-errors +spec: + resourceType: fluentd + displayResourceType: rancher-component + priority: 301 + title: fluentd-output-errors + metricsSelector: + details: "false" + component: fluentd + metric: output-errors + detailsMetricsSelector: + details: "true" + component: fluentd + metric: output-errors + yAxis: + unit: number +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: fluentd + cluster-graph: rancher-component + name: fluentd-buffer-queue-length +spec: + resourceType: fluentd + displayResourceType: rancher-component + priority: 301 + title: fluentd-buffer-queue-length + metricsSelector: + details: "false" + component: fluentd + metric: buffer-queue-length + detailsMetricsSelector: + details: "true" + component: fluentd + metric: buffer-queue-length + yAxis: + unit: number \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphingresscontroller.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphingresscontroller.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab3b37817419909230d07789409ab9080fe7c6a7 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphingresscontroller.yaml @@ -0,0 +1,78 @@ +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: ingresscontroller + cluster-graph: kube-component + name: ingresscontroller-nginx-connection +spec: + resourceType: ingresscontroller + displayResourceType: kube-component + priority: 330 + title: ingresscontroller-nginx-connection + metricsSelector: + details: "false" + component: ingresscontroller + graph: nginx-connection + detailsMetricsSelector: + details: "true" + component: ingresscontroller + graph: nginx-connection + yAxis: + unit: number +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: ingresscontroller + cluster-graph: kube-component + name: ingresscontroller-request-process-time +spec: + resourceType: ingresscontroller + displayResourceType: kube-component + priority: 331 + title: ingresscontroller-request-process-time + metricsSelector: + details: "false" + component: ingresscontroller + metric: request-process-seconds + detailsMetricsSelector: + details: "true" + component: ingresscontroller + metric: request-process-seconds + yAxis: + unit: seconds +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: ingresscontroller + cluster-graph: kube-component + name: ingresscontroller-upstream-response-seconds +spec: + resourceType: ingresscontroller + displayResourceType: kube-component + priority: 332 + title: ingresscontroller-upstream-response-seconds + metricsSelector: + details: "false" + component: ingresscontroller + metric: upstream-response-seconds + detailsMetricsSelector: + details: "true" + component: ingresscontroller + metric: upstream-response-seconds + yAxis: + unit: seconds + graphType: singlestat \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphnode.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphnode.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14d028083a127e222ab8acacb08adee995cce6a6 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphnode.yaml @@ -0,0 +1,171 @@ +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: node + name: node-cpu-usage +spec: + resourceType: node + priority: 500 + title: node-cpu-usage + metricsSelector: + details: "false" + component: node + metric: cpu-usage-seconds-sum-rate + detailsMetricsSelector: + details: "true" + component: node + metric: cpu-usage-seconds-sum-rate + yAxis: + unit: percent +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: node + name: node-cpu-load +spec: + resourceType: node + priority: 501 + title: node-cpu-load + metricsSelector: + details: "false" + component: node + graph: cpu-load + detailsMetricsSelector: + details: "true" + component: node + graph: cpu-load + yAxis: + unit: number +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: node + name: node-memory-usage +spec: + resourceType: node + priority: 502 + title: node-memory-usage + metricsSelector: + details: "false" + component: node + metric: memory-usage-percent + detailsMetricsSelector: + details: "true" + component: node + metric: memory-usage-percent + yAxis: + unit: percent +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: node + name: node-fs-usage-percent +spec: + resourceType: node + priority: 503 + title: node-fs-usage-percent + thresholds: 10 + metricsSelector: + details: "false" + component: node + metric: fs-usage-percent + detailsMetricsSelector: + details: "true" + component: node + metric: fs-usage-percent + yAxis: + unit: percent +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: node + name: node-disk-io +spec: + resourceType: node + priority: 504 + title: node-disk-io + thresholds: 10 + metricsSelector: + details: "false" + component: node + graph: disk-io + detailsMetricsSelector: + details: "true" + component: node + graph: disk-io + yAxis: + unit: kbps +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: node + name: node-network-io +spec: + resourceType: node + priority: 505 + title: node-network-io + thresholds: 10 + metricsSelector: + details: "false" + component: node + graph: network-io + detailsMetricsSelector: + details: "true" + component: node + graph: network-io + yAxis: + unit: kbps +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: node + name: node-network-packet +spec: + resourceType: node + priority: 506 + title: node-network-packet + thresholds: 10 + metricsSelector: + details: "false" + component: node + graph: network-packet + detailsMetricsSelector: + details: "true" + component: node + graph: network-packet + yAxis: + unit: pps \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphscheduler.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphscheduler.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b54a131ef9fd75950846a4ea62c40a7466fdf4ba --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/templates/graphscheduler.yaml @@ -0,0 +1,80 @@ +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: scheduler + cluster-graph: kube-component + name: scheduler-e-2-e-scheduling-latency-seconds-quantile +spec: + resourceType: scheduler + displayResourceType: kube-component + priority: 320 + title: scheduler-e-2-e-scheduling-latency-seconds-quantile + thresholds: 10 + metricsSelector: + details: "false" + component: scheduler + metric: e-2-e-scheduling-latency-seconds-quantile + detailsMetricsSelector: + details: "true" + component: scheduler + metric: e-2-e-scheduling-latency-seconds-quantile + yAxis: + unit: seconds +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: scheduler + cluster-graph: kube-component + name: scheduler-total-preemption-attempts +spec: + resourceType: scheduler + displayResourceType: kube-component + priority: 321 + title: scheduler-total-preemption-attempts + thresholds: 10 + metricsSelector: + details: "false" + component: scheduler + metric: total-preemption-attempts + detailsMetricsSelector: + details: "true" + component: scheduler + metric: total-preemption-attempts + yAxis: + unit: number +--- +apiVersion: management.cattle.io/v3 +kind: ClusterMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: cluster + component: scheduler + cluster-graph: kube-component + name: scheduler-pod-unscheduler +spec: + resourceType: scheduler + displayResourceType: kube-component + priority: 322 + title: scheduler-pod-unscheduler + thresholds: 10 + metricsSelector: + details: "false" + component: scheduler + metric: pod-unscheduler + detailsMetricsSelector: + details: "true" + component: scheduler + metric: pod-unscheduler + yAxis: + unit: number \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-cluster/values.yaml new file mode 100755 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-project/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-project/Chart.yaml new file mode 100755 index 0000000000000000000000000000000000000000..131042b09651d0603b54090a195d99fd471d8923 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-project/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +description: Creates Metrics CRD of Rancher monitoring graph +engine: gotpl +maintainers: +- name: aiwantaozi + email: michelia.feng@gmail.com +name: metric-expression-project +version: 0.0.1 + diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-project/templates/graphcontainer.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-project/templates/graphcontainer.yaml new file mode 100644 index 0000000000000000000000000000000000000000..126cc09fcde657bd012fc10f13018b2dd0cef0b8 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-project/templates/graphcontainer.yaml @@ -0,0 +1,125 @@ +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: container + name: container-cpu-usage +spec: + projectName: {{ .ProjectName }} + resourceType: container + priority: 800 + title: container-cpu-usage + metricsSelector: + details: "false" + component: container + graph: container-cpu-usage + detailsMetricsSelector: + details: "true" + component: container + graph: container-cpu-usage-details + yAxis: + unit: mcpu +--- +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: container + name: container-memory-usage-bytes-sum +spec: + projectName: {{ .ProjectName }} + resourceType: container + priority: 801 + title: container-memory-usage-bytes-sum + metricsSelector: + details: "false" + component: container + metric: memory-usage-bytes-sum + detailsMetricsSelector: + details: "true" + component: container + metric: memory-usage-bytes-sum + yAxis: + unit: byte +--- +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: container + name: container-network-io +spec: + projectName: {{ .ProjectName }} + resourceType: container + priority: 802 + title: container-network-io + metricsSelector: + details: "false" + component: container + graph: network-io + detailsMetricsSelector: + details: "true" + component: container + graph: network-io + yAxis: + unit: kbps +--- +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: container + name: container-network-packet +spec: + projectName: {{ .ProjectName }} + resourceType: container + priority: 803 + title: container-network-packet + metricsSelector: + details: "false" + component: container + graph: network-packet + detailsMetricsSelector: + details: "true" + component: container + graph: network-packet + yAxis: + unit: pps +--- +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: container + name: container-disk-io +spec: + projectName: {{ .ProjectName }} + resourceType: container + priority: 804 + title: container-disk-io + metricsSelector: + details: "false" + component: container + graph: disk-io + detailsMetricsSelector: + details: "true" + component: container + graph: disk-io + yAxis: + unit: kbps +--- diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-project/templates/graphpod.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-project/templates/graphpod.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f482d27a28aec819b37b875585dc0507c9e18eb8 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-project/templates/graphpod.yaml @@ -0,0 +1,125 @@ +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: pod + name: pod-cpu-usage +spec: + projectName: {{ .ProjectName }} + resourceType: pod + priority: 700 + title: pod-cpu-usage + metricsSelector: + details: "false" + component: pod + graph: container-cpu-usage + detailsMetailsMetricsSelector: + details: "true" + component: pod + graph: container-cpu-usage-details + yAxis: + unit: mcpu +--- +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: pod + name: pod-memory-usage-bytes-sum +spec: + projectName: {{ .ProjectName }} + resourceType: pod + priority: 701 + title: pod-memory-usage-bytes-sum + metricsSelector: + details: "false" + component: pod + metric: memory-usage-bytes-sum + detailsMetricsSelector: + details: "true" + component: pod + metric: memory-usage-bytes-sum + yAxis: + unit: byte +--- +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: pod + name: pod-network-io +spec: + projectName: {{ .ProjectName }} + resourceType: pod + priority: 702 + title: pod-network-io + metricsSelector: + details: "false" + component: pod + graph: network-io + detailsMetricsSelector: + details: "true" + component: pod + graph: network-io + yAxis: + unit: kbps +--- +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: pod + name: pod-network-packet +spec: + projectName: {{ .ProjectName }} + resourceType: pod + priority: 703 + title: pod-network-packet + metricsSelector: + details: "false" + component: pod + graph: network-packet + detailsMetricsSelector: + details: "true" + component: pod + graph: network-packet + yAxis: + unit: pps +--- +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: pod + name: pod-disk-io +spec: + projectName: {{ .ProjectName }} + resourceType: pod + priority: 704 + title: pod-disk-io + metricsSelector: + details: "false" + component: pod + graph: disk-io + detailsMetricsSelector: + details: "true" + component: pod + graph: disk-io + yAxis: + unit: kbps +--- diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-project/templates/graphworkload.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-project/templates/graphworkload.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7197ab3f5954b92bbb85e70e6df9685d4988ccc4 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-project/templates/graphworkload.yaml @@ -0,0 +1,125 @@ +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: workload + name: workload-cpu-usage +spec: + projectName: {{ .ProjectName }} + resourceType: workload + priority: 600 + title: workload-cpu-usage + metricsSelector: + details: "false" + component: workload + graph: container-cpu-usage + detailsMetailsMetricsSelector: + details: "true" + component: pod + graph: container-cpu-usage-details + yAxis: + unit: mcpu +--- +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: workload + name: workload-memory-usage-bytes-sum +spec: + projectName: {{ .ProjectName }} + resourceType: workload + priority: 601 + title: workload-memory-usage-bytes-sum + metricsSelector: + details: "false" + component: workload + metric: memory-usage-bytes-sum + detailsMetricsSelector: + details: "true" + component: workload + metric: memory-usage-bytes-sum + yAxis: + unit: byte +--- +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: workload + name: workload-network-io +spec: + projectName: {{ .ProjectName }} + resourceType: workload + priority: 602 + title: workload-network-io + metricsSelector: + details: "false" + component: workload + graph: network-io + detailsMetricsSelector: + details: "true" + component: workload + graph: network-io + yAxis: + unit: kbps +--- +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: workload + name: workload-network-packet +spec: + projectName: {{ .ProjectName }} + resourceType: workload + priority: 603 + title: workload-network-packet + metricsSelector: + details: "false" + component: workload + graph: network-packet + detailsMetricsSelector: + details: "true" + component: workload + graph: network-packet + yAxis: + unit: pps +--- +apiVersion: management.cattle.io/v3 +kind: ProjectMonitorGraph +metadata: + labels: + app: metric-expression + source: rancher-monitoring + level: project + component: workload + name: workload-disk-io +spec: + projectName: {{ .ProjectName }} + resourceType: workload + priority: 604 + title: workload-disk-io + metricsSelector: + details: "false" + component: workload + graph: disk-io + detailsMetricsSelector: + details: "true" + component: workload + graph: disk-io + yAxis: + unit: kbps +--- diff --git a/charts/rancher-monitoring/v0.0.1/charts/metric-expression-project/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/metric-expression-project/values.yaml new file mode 100755 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/charts/rancher-monitoring/v0.0.1/charts/prometheus/Chart.yaml b/charts/rancher-monitoring/v0.0.1/charts/prometheus/Chart.yaml new file mode 100755 index 0000000000000000000000000000000000000000..bcac1efa071b7099cafef54d2f9e959ab48be92e --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/prometheus/Chart.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +description: Creates Prometheus CRD instance for Kubernetes which maintaining by Rancher 2. +engine: gotpl +maintainers: +- name: thxCode + email: frank@rancher.com +name: prometheus +version: 0.0.1 diff --git a/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/hooks-configmap.yaml b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/hooks-configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b62da2402ebb979c10aae44c4733088a3ce72663 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/hooks-configmap.yaml @@ -0,0 +1,28 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.hooks.fullname" . }} +data: + replace-config-by-auth.sh: |- + #!/bin/sh + + srcpath="/template/nginx.conf" + dstpath="/host/nginx.conf" + + if [ -f $srcpath ] && [ -d /host ]; then + token=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token) + + sed "s/REPLACE_PARAM_AUTHORIZATION/Bearer ${token}/g" $srcpath > $dstpath + cat $dstpath + + exit 0 + fi + + exit 1 + + diff --git a/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/metrics-service.yaml b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/metrics-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c14f5da657ea976eb9d70cc8a232d5365c5b0168 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/metrics-service.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Service +metadata: + name: expose-prometheus-metrics + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + monitoring.cattle.io: "true" +spec: + type: ClusterIP + selector: +{{- if .Values.labels }} +{{ toYaml .Values.labels | indent 4 }} +{{- else }} + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} +{{- end }} + ports: + - name: http + port: 9090 + targetPort: web \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/nginx-comfigmap.yaml b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/nginx-comfigmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..24a86b2103c2759f8cae6839b06e37011a5d20ea --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/nginx-comfigmap.yaml @@ -0,0 +1,56 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "app.nginx.fullname" . }} + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + component: nginx +data: + nginx.conf: |- + user nginx; + worker_processes auto; + error_log /dev/null warn; + pid /var/run/nginx.pid; + + events { + worker_connections 1024; + } + + http { + include /etc/nginx/mime.types; + + log_format main '[$time_local - $status] $remote_addr - $remote_user $request ($http_referer)'; + + server { + listen 80; + + access_log off; + + gzip on; + gzip_min_length 1k; + gzip_comp_level 2; + gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript image/jpeg image/gif image/png; + gzip_vary on; + gzip_disable "MSIE [1-6]\."; + + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Authorization "REPLACE_PARAM_AUTHORIZATION"; + proxy_pass_header Authorization; + + location / { + proxy_pass http://prometheus-operated:9090/; + + sub_filter_types text/html; + sub_filter_once off; + sub_filter 'var PATH_PREFIX = "";' 'var PATH_PREFIX = ".";'; + } + + } + + } diff --git a/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/nginx-deployment.yaml b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/nginx-deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2b80ffd8787b047ec6654598108e0e68a9feab60 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/nginx-deployment.yaml @@ -0,0 +1,73 @@ +apiVersion: {{ template "deployment_api_version" . }} +kind: Deployment +metadata: + name: {{ template "app.nginx.fullname" . }} + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + component: nginx +spec: + replicas: 1 + selector: + matchLabels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + component: nginx + template: + metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + component: nginx + spec: + {{- if .Values.enabledRBAC }} + serviceAccountName: {{ .Values.serviceAccountName }} + {{- end }} + initContainers: + - name: nginx-init-auth-add + image: {{ .Values.image.inits.tools.repository }}:{{ .Values.image.inits.tools.tag }} + command: + - /usr/bin/replace-config-by-auth.sh + volumeMounts: + - name: prometheus-static-hooks + mountPath: /usr/bin/replace-config-by-auth.sh + subPath: replace-config-by-auth.sh + - name: prometheus-static-contents + mountPath: /host + - name: prometheus-nginx-template + mountPath: /template + containers: + - name: nginx + image: nginx:1.15.2 + args: + - nginx + - -g + - daemon off; + - -c + - /nginx/nginx.conf + volumeMounts: + - mountPath: /nginx + name: prometheus-static-contents + ports: + - name: http + containerPort: 80 + protocol: TCP + volumes: + - name: prometheus-static-hooks + configMap: + name: {{ template "app.hooks.fullname" . }} + defaultMode: 0777 + - name: prometheus-static-contents + emptyDir: {} + - name: prometheus-nginx-template + configMap: + name: {{ template "app.nginx.fullname" . }} + defaultMode: 438 + items: + - key: nginx.conf + mode: 438 + path: nginx.conf diff --git a/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/prometheus.yaml b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/prometheus.yaml new file mode 100755 index 0000000000000000000000000000000000000000..5be5c160f17ee6c4204c0b8e22fb8005377993dd --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/prometheus.yaml @@ -0,0 +1,144 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: Prometheus +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +{{- if .Values.labels }} +{{ toYaml .Values.labels | indent 4 }} +{{- end }} + name: {{ .Release.Name }} +spec: +{{- if .Values.listenLocal }} + listenLocal: true +{{- end }} + podMetadata: + labels: +{{- if .Values.labels }} +{{ toYaml .Values.labels | indent 6 }} +{{- else }} + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} +{{- end }} +{{- if .Values.alertingEndpoints }} + alerting: + alertmanagers: +{{ toYaml .Values.alertingEndpoints | indent 6 }} +{{- else }} + alerting: + alertmanagers: + - namespace: {{ .Release.Namespace }} + name: alertmanager-operated + port: http +{{- end }} + baseImage: "{{ .Values.image.repository }}" +{{- if .Values.externalLabels }} + externalLabels: +{{ toYaml .Values.externalLabels | indent 4}} +{{- end }} +{{- if .Values.externalUrl }} + externalUrl: "{{ .Values.externalUrl }}" +{{- end }} +{{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 4 }} +{{- end }} + paused: {{ .Values.paused }} + replicas: {{ .Values.replicaCount }} + logLevel: {{ .Values.logLevel }} + resources: +{{ toYaml .Values.resources | indent 4 }} + retention: "{{ .Values.retention }}" +{{- if .Values.routePrefix }} + routePrefix: "{{ .Values.routePrefix }}" +{{- end }} +{{- if .Values.secrets }} + secrets: +{{ toYaml .Values.secrets | indent 4 }} +{{- end }} +{{- if .Values.enabledRBAC }} + serviceAccountName: {{ .Values.serviceAccountName }} +{{- end }} +{{- if .Values.serviceMonitorNamespaceSelector }} + serviceMonitorNamespaceSelector: +{{ toYaml .Values.serviceMonitorNamespaceSelector | indent 4 }} +{{- end }} + serviceMonitorSelector: +{{- if .Values.serviceMonitorsSelector }} +{{ toYaml .Values.serviceMonitorsSelector | indent 4 }} +{{- else }} + matchLabels: + source: rancher-monitoring + release: {{ .Release.Name }} +{{- end }} +{{- if .Values.remoteRead }} + remoteRead: +{{ toYaml .Values.remoteRead | indent 4 }} +{{- end }} +{{- if .Values.remoteWrite }} + remoteWrite: +{{ toYaml .Values.remoteWrite | indent 4 }} +{{- end }} +{{- if .Values.ruleNamespaceSelector }} + ruleNamespaceSelector: +{{ toYaml .Values.ruleNamespaceSelector | indent 4 }} +{{- end }} + ruleSelector: +{{- if .Values.rulesSelector }} +{{ toYaml .Values.rulesSelector | indent 4 }} +{{- else }} + matchLabels: + source: rancher-monitoring + release: {{ .Release.Name }} +{{- end }} +{{- if or .Values.storageSpec .Values.persistence.enabled }} + storage: + volumeClaimTemplate: + spec: +{{- if .Values.storageSpec }} +{{ toYaml .Values.storageSpec | indent 8 }} +{{- else }} + {{ if and .Values.persistence.storageClass (ne "default" .Values.persistence.storageClass) }} + storageClassName: {{ .Values.persistence.storageClass }} + {{ end }} + accessModes: + - {{ default "ReadWriteOnce" .Values.persistence.accessMode }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} +{{- end }} +{{- end }} + version: "{{ .Values.image.tag }}" + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + topologyKey: kubernetes.io/hostname + labelSelector: + matchLabels: + app: {{ template "app.name" . }} + prometheus: {{ .Release.Name }} +{{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 4 }} +{{- end }} + imagePullSecrets: +{{ toYaml .Values.image.pullSecrets | indent 4 }} +{{- if and .Values.additionalScrapeConfigsEnabled .Values.additionalScrapeConfigs }} + additionalScrapeConfigs: + name: {{ template "app.fullname" . }}-additional-scrape-configs + key: additional-scrape-configs.yaml +{{- end }} +{{- if and .Values.additionalAlertManagerConfigsEnabled .Values.additionalAlertManagerConfigs }} + additionalAlertManagerConfigs: + name: {{ template "app.fullname" . }}-additional-alertmanager-configs + key: additional-alertmanager-configs.yaml +{{- end }} +{{- if .Values.sidecarsSpec }} + containers: +{{ toYaml .Values.sidecarsSpec | indent 4 }} +{{- end }} diff --git a/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/secrets.yaml b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/secrets.yaml new file mode 100755 index 0000000000000000000000000000000000000000..b906c366ee154709ed0ce908790fcce832fe4f84 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/secrets.yaml @@ -0,0 +1,28 @@ +{{- if and .Values.additionalScrapeConfigsEnabled .Values.additionalScrapeConfigs }} +apiVersion: v1 +kind: Secret +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.fullname" . }}-additional-scrape-configs +data: + additional-scrape-configs.yaml: {{ toYaml .Values.additionalScrapeConfigs | b64enc | quote }} +{{- end }} + +{{- if and .Values.additionalAlertManagerConfigsEnabled .Values.additionalAlertManagerConfigs }} +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.fullname" . }}-additional-alertmanager-configs +data: + additional-alertmanager-configs.yaml: {{ toYaml .Values.additionalAlertManagerConfigs | b64enc | quote }} +{{- end }} diff --git a/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/service.yaml b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/service.yaml new file mode 100755 index 0000000000000000000000000000000000000000..d6f2262b1545c6868c239bcc0eeb4cfb37e7e7fc --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/service.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + name: access-prometheus + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + kubernetes.io/cluster-service: "true" +spec: + type: ClusterIP + selector: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + component: nginx + ports: + - name: http + port: 80 + targetPort: http diff --git a/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/servicemonitor.yaml b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/servicemonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..94e179ab3e46d796def2a74cfb2aa39132b049cc --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/prometheus/templates/servicemonitor.yaml @@ -0,0 +1,27 @@ +apiVersion: {{ template "operator_api_version" . }} +kind: ServiceMonitor +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.serviceMonitor.labels }} +{{ toYaml .Values.serviceMonitor.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + jobLabel: prometheus + selector: + matchLabels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + monitoring.cattle.io: "true" + namespaceSelector: + matchNames: + - {{ .Release.Namespace | quote }} + endpoints: + - port: http + interval: 30s diff --git a/charts/rancher-monitoring/v0.0.1/charts/prometheus/values.yaml b/charts/rancher-monitoring/v0.0.1/charts/prometheus/values.yaml new file mode 100644 index 0000000000000000000000000000000000000000..076ec74f5fb10c2352df4419dedb020065401f5c --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/charts/prometheus/values.yaml @@ -0,0 +1,183 @@ +enabledRBAC: true + +## Already exist ServiceAccount +## +serviceAccountName: "" + +## CRD apiGroup +## +apiGroup: "monitoring.coreos.com" + +## Alertmanagers to which alerts will be sent +## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#alertmanagerendpoints +## +alertingEndpoints: [] +# - name: "" +# namespace: "" +# port: http +# scheme: http + +## External labels to add to any time series or alerts when communicating with external systems +## +externalLabels: {} + +## External URL at which Prometheus will be reachable +## +externalUrl: "" + +serviceMonitor: + ## Custom Labels to be added to ServiceMonitor + ## + labels: {} + +##Custom Labels to be added to Prometheus Rules CRDs +## +additionalRulesLabels: {} + +## Prometheus container image +## +image: + ## Reference to one or more secrets to be used when pulling images + ## + pullSecrets: [] + repository: quay.io/prometheus/prometheus + tag: v2.4.3 + inits: + tools: + repository: maiwj/curl + tag: 7.56.1-r0 + +## Labels to be added to the Prometheus +## +# labels: {} + +## Node labels for Prometheus pod assignment +## Ref: https://kubernetes.io/docs/user-guide/node-selection/ +## +nodeSelector: {} + +## Tolerations for use with node taints +## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +## +tolerations: {} + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + + +## If true, the Operator won't process any Prometheus configuration changes +## +paused: false + + +## Number of Prometheus replicas desired +## +replicaCount: 1 + +## The remote_read spec configuration for Prometheus. +## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#remotereadspec +remoteRead: {} + # remoteRead: + # - url: http://remote1/read + +## The remote_write spec configuriation for Prometheus. +## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#remotewritespec +remoteWrite: {} + # remoteWrite: + # - url: http://remote1/push + +## Resource limits & requests +## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ +## +resources: {} + # requests: + # memory: 400Mi + +## How long to retain metrics +## +retention: 24h + +## Prefix used to register routes, overriding externalUrl route. +## Useful for proxies that rewrite URLs. +## +routePrefix: "" + +## Namespaces to be selected for PrometheusRules discovery. +## If unspecified, only the same namespace as the Prometheus object is in is used. +ruleNamespaceSelector: {} +## Rules CRD selector +## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/design.md +## +## 1. If `matchLabels` is used, `rules.additionalLabels` must contain all the labels from +## `matchLabels` in order to be be matched by Prometheus +## 2. If `matchExpressions` is used `rules.additionalLabels` must contain at least one label +## from `matchExpressions` in order to be matched by Prometheus +## Ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels +rulesSelector: {} + # rulesSelector: { + # matchExpressions: [{key: prometheus, operator: In, values: [example-rules, example-rules-2]}] + # } + ### OR + # rulesSelector: { + # matchLabels: {role: example-rules} + # } + +## List of Secrets in the same namespace as the Prometheus +## object, which shall be mounted into the Prometheus Pods. +## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec +## +secrets: [] + +serviceMonitorNamespaceSelector: {} + +## Service monitors selector +## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/design.md +## +serviceMonitorsSelector: {} + +logLevel: "info" + +## Prometheus StorageSpec for persistent data +## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md +## +storageSpec: {} +# storageClassName: gluster +# accessModes: ["ReadWriteOnce"] +# resources: +# requests: +# storage: 50Gi +# selector: {} + +## Easy way to create persistent data +## +persistence: {} +# enabled: true +# storageClass: gluster +# accessMode: "ReadWriteOnce" +# size: 50Gi + +## Prometheus AdditionalScrapeConfigs +## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec +## +additionalScrapeConfigsEnabled: false +additionalScrapeConfigs: [] +# - job_name: "prometheus" +# static_configs: +# - targets: +# - "localhost:9090" + +## Prometheus AdditionalAlertManagerConfigs +## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec +## +additionalAlertManagerConfigsEnabled: false +additionalAlertManagerConfigs: {} +# static_configs: +# - targets: +# - "localhost:9093" + +listenLocal: false + +sidecarsSpec: [] +# - name: sidecar +# image: registry/name:tag diff --git a/charts/rancher-monitoring/v0.0.1/questions.yml b/charts/rancher-monitoring/v0.0.1/questions.yml new file mode 100644 index 0000000000000000000000000000000000000000..e56c9d5a19878133efa3c3f475cee5b7ae91ac58 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/questions.yml @@ -0,0 +1,4 @@ +categories: +- Monitoring + +questions: diff --git a/charts/rancher-monitoring/v0.0.1/requirements.yaml b/charts/rancher-monitoring/v0.0.1/requirements.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce2be58840f23c9b29d2023d949b6c67aeddafa7 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/requirements.yaml @@ -0,0 +1,75 @@ +dependencies: + - name: alertmanager + version: 0.0.1 + condition: alertmanager.enabled + repository: "file://./charts/alertmanager/" + + - name: exporter-coredns + version: 0.0.1 + condition: exporter-coredns.enabled + repository: "file://./charts/exporter-coredns/" + + - name: exporter-kube-controller-manager + version: 0.0.1 + condition: exporter-kube-controller-manager.enabled + repository: "file://./charts/exporter-kube-controller-manager/" + + - name: exporter-kube-dns + version: 0.0.1 + condition: exporter-kube-dns.enabled + repository: "file://./charts/exporter-kube-dns/" + + - name: exporter-kube-etcd + version: 0.0.1 + condition: exporter-kube-etcd.enabled + repository: "file://./charts/exporter-kube-etcd/" + + - name: exporter-kube-scheduler + version: 0.0.1 + condition: exporter-kube-scheduler.enabled + repository: "file://./charts/exporter-kube-scheduler/" + + - name: exporter-kube-state + version: 0.0.1 + condition: exporter-kube-state.enabled + repository: "file://./charts/exporter-kube-state/" + + - name: exporter-kubelets + version: 0.0.1 + condition: exporter-kubelets.enabled + repository: "file://./charts/exporter-kubelets/" + + - name: exporter-kubernetes + version: 0.0.1 + condition: exporter-kubernetes.enabled + repository: "file://./charts/exporter-kubernetes/" + + - name: exporter-node + version: 0.0.1 + condition: exporter-node.enabled + repository: "file://./charts/exporter-node/" + + - name: grafana + version: 0.0.1 + condition: grafana.enabled + repository: "file://./charts/grafana/" + + - name: prometheus + version: 0.0.1 + condition: prometheus.enabled + repository: "file://./charts/prometheus/" + + - name: metric-expression-project + version: 0.0.1 + condition: metric-expression-project.enabled + repository: "file://./charts/metric-expression-project/" + + - name: metric-expression-cluster + version: 0.0.1 + condition: metric-expression-cluster.enabled + repository: "file://./charts/metric-expression-cluster/" + + - name: exporter-fluentd + version: 0.0.1 + condition: exporter-fluentd.enabled + repository: "file://./charts/exporter-fluentd/" \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/templates/_helpers.tpl b/charts/rancher-monitoring/v0.0.1/templates/_helpers.tpl new file mode 100644 index 0000000000000000000000000000000000000000..ea9f31f283557032d0c8d332ac12dbb53b9326b6 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/templates/_helpers.tpl @@ -0,0 +1,125 @@ +{{/* vim: set filetype=mustache: */}} + +{{- define "charts.exporter-kubelets.fullname" -}} +{{- printf "exporter-kubelets-%s" .Release.Name -}} +{{- end -}} + + +{{- define "charts.prometheus.serviceaccount.fullname" -}} +{{- printf "prometheus-%s" .Release.Name -}} +{{- end -}} + + +{{- define "app.name" -}} +{{- default .Chart.Name .Values.nameOverride -}} +{{- end -}} + + +{{- define "app.version" -}} +{{- $name := include "app.name" . -}} +{{- $version := .Chart.Version | replace "+" "_" -}} +{{- printf "%s-%s" $name $version -}} +{{- end -}} + + +{{- define "app.fullname" -}} +{{- $name := include "app.name" . -}} +{{- printf "%s-%s" $name .Release.Name -}} +{{- end -}} + + +{{- define "app.dnsname" -}} +{{- include "app.fullname" . | trunc 63 | trimSuffix "-" -}} +{{- end -}} + + +{{- define "app.psp.fullname" -}} +{{- $name := include "app.name" . -}} +{{- printf "%s-%s-psp" $name .Release.Name -}} +{{- end -}} + + +{{- define "app.nginx.fullname" -}} +{{- $name := include "app.name" . -}} +{{- printf "%s-%s-nginx" $name .Release.Name -}} +{{- end -}} + + +{{- define "app.dashboards.fullname" -}} +{{- $name := include "app.name" . -}} +{{- printf "%s-%s-dashboards" $name .Release.Name -}} +{{- end -}} + + +{{- define "app.hooks.fullname" -}} +{{- $name := include "app.name" . -}} +{{- printf "%s-%s-hooks" $name .Release.Name -}} +{{- end -}} + + +{{- define "app.cleanup.fullname" -}} +{{- $name := include "app.name" . -}} +{{- printf "%s-%s-cleanup" $name .Release.Name -}} +{{- end -}} + + +{{- define "kube_version" -}} +{{- printf "%s.%s" .Capabilities.KubeVersion.Major .Capabilities.KubeVersion.Minor -}} +{{- end -}} + + +{{- define "operator_api_version" -}} +{{- default "monitoring.coreos.com/v1" (.Values.apiGroup | printf "%s/v1") -}} +{{- end -}} + + +{{- define "operator_api_group" -}} +{{- $apiVersion := include "operator_api_version" . -}} +{{- index (regexSplit "/" $apiVersion 2) 0 | printf "%s" -}} +{{- end -}} + + +{{- define "deployment_api_version" -}} +{{- if .Capabilities.APIVersions.Has "apps/v1" -}} +{{- "apps/v1" -}} +{{- else if .Capabilities.APIVersions.Has "apps/v1beta2" -}} +{{- "apps/v1beta1" -}} +{{- else if .Capabilities.APIVersions.Has "apps/v1beta1" -}} +{{- "apps/v1beta1" -}} +{{- else -}} +{{- "extensions/v1beta1" -}} +{{- end -}} +{{- end -}} + + +{{- define "statefulset_api_version" -}} +{{- if .Capabilities.APIVersions.Has "apps/v1" -}} +{{- "apps/v1" -}} +{{- else if .Capabilities.APIVersions.Has "apps/v1beta2" -}} +{{- "apps/v1beta2" -}} +{{- else -}} +{{- "apps/v1beta1" -}} +{{- end -}} +{{- end -}} + + +{{- define "daemonset_api_version" -}} +{{- if .Capabilities.APIVersions.Has "apps/v1" -}} +{{- "apps/v1" -}} +{{- else if .Capabilities.APIVersions.Has "apps/v1beta2" -}} +{{- "apps/v1beta2" -}} +{{- else -}} +{{- "extensions/v1beta1" -}} +{{- end -}} +{{- end -}} + + +{{- define "rbac_api_version" -}} +{{- if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1" -}} +{{- "rbac.authorization.k8s.io/v1" -}} +{{- else if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1beta1" -}} +{{- "rbac.authorization.k8s.io/v1beta1" -}} +{{- else -}} +{{- "rbac.authorization.k8s.io/v1alpha1" -}} +{{- end -}} +{{- end -}} diff --git a/charts/rancher-monitoring/v0.0.1/templates/deployment.yaml b/charts/rancher-monitoring/v0.0.1/templates/deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..814c6ebb38f1c939124453b7d37417d20e91b392 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/templates/deployment.yaml @@ -0,0 +1,55 @@ +{{- if .Values.enabled }} +apiVersion: {{ template "deployment_api_version" . }} +kind: Deployment +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "app.fullname" . }} +spec: + replicas: 1 + selector: + matchLabels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + template: + metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + spec: + containers: + - name: prometheus-operator + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + args: + - --kubelet-service={{ .Release.Namespace }}/expose-kubelets-metrics + - --log-format={{ .Values.logFormat }} + - --log-level={{ .Values.logLevel }} + - --prometheus-config-reloader={{ .Values.image.prometheusConfigReloader.repository }}:{{ .Values.image.prometheusConfigReloader.tag }} + - --config-reloader-image={{ .Values.image.configmapReload.repository }}:{{ .Values.image.configmapReload.tag }} + - --labels=monitoring.cattle.io=true + - --crd-apigroup={{ template "operator_api_group" . }} + - --manage-crds={{ .Values.manageCRDs }} + - --with-validation={{ .Values.withValidation }} + - --disable-auto-user-group={{ .Values.disableAutoUserGroup }} + ports: + - containerPort: 8080 + name: http + resources: +{{ toYaml .Values.resources | indent 12 }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + {{- if .Values.enabledRBAC }} + serviceAccountName: {{ .Values.serviceAccountName }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/v0.0.1/templates/metrics-service.yaml b/charts/rancher-monitoring/v0.0.1/templates/metrics-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd82dacc41912bf77a2b787e54798c71d6100296 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/templates/metrics-service.yaml @@ -0,0 +1,22 @@ +{{- if .Values.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: expose-operator-metrics + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + monitoring.cattle.io: "true" +spec: + type: ClusterIP + selector: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + ports: + - name: http + port: 8080 + targetPort: http +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/v0.0.1/templates/servicemonitor.yaml b/charts/rancher-monitoring/v0.0.1/templates/servicemonitor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4914b09c764d7444c71f8a5e262e2c9187f95e3 --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/templates/servicemonitor.yaml @@ -0,0 +1,30 @@ +{{- if .Values.enabled }} +apiVersion: {{ template "operator_api_version" . }} +kind: ServiceMonitor +metadata: + labels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + source: rancher-monitoring + {{- if .Values.serviceMonitor.labels }} +{{ toYaml .Values.serviceMonitor.labels | indent 4 }} + {{- end }} + name: {{ template "app.fullname" . }} +spec: + jobLabel: prometheus-operator + selector: + matchLabels: + app: {{ template "app.name" . }} + chart: {{ template "app.version" . }} + release: {{ .Release.Name }} + monitoring.cattle.io: "true" + namespaceSelector: + matchNames: + - {{ .Release.Namespace | quote }} + endpoints: + - port: http + interval: 30s + honorLabels: true +{{- end }} diff --git a/charts/rancher-monitoring/v0.0.1/values.yaml b/charts/rancher-monitoring/v0.0.1/values.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c3cd5b074025e9089741ac23bf7a650efa47e2bd --- /dev/null +++ b/charts/rancher-monitoring/v0.0.1/values.yaml @@ -0,0 +1,311 @@ +enabled: false + +nameOverride: "prometheus-operator" + +enabledRBAC: true + +## CRD apiGroup +## +apiGroup: "monitoring.coreos.com" + +## Prometheus-operator image +## +image: + # Reference to one or more secrets to be used when pulling images + pullSecrets: [] + repository: quay.io/coreos/prometheus-operator + tag: v0.23.2 + ## Prometheus-config-reloader image to use for config and rule reloading + ## + prometheusConfigReloader: + repository: quay.io/coreos/prometheus-config-reloader + tag: v0.23.2 + ## Configmap-reload image to use for reloading configmaps + ## + configmapReload: + repository: quay.io/coreos/configmap-reload + tag: v0.0.1 + +## Node labels for prometheus-operator pod assignment +## +nodeSelector: + beta.kubernetes.io/os: linux + +## Tolerations for use with node taints +## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +## +tolerations: {} + +logFormat: "logfmt" + +logLevel: "info" + +manageCRDs: false + +withValidation: true + +disableAutoUserGroup: false + +serviceMonitor: + ## Custom Labels to be added to ServiceMonitor + ## + labels: {} + +## Prometheus-operator resource limits & requests +## Ref: https://kubernetes.io/docs/user-guide/compute-resources/ +## +resources: {} + # limits: + # cpu: 200m + # memory: 100Mi + # requests: + # cpu: 100m + # memory: 50Mi + +## Already exist ServiceAccount +## +serviceAccountName: "" + +alertmanager: + enabled: false + apiGroup: "monitoring.coreos.com" + image: + repository: quay.io/prometheus/alertmanager + tag: v0.15.2 + nodeSelector: + beta.kubernetes.io/os: linux + config: + global: + resolve_timeout: 5m + route: + group_by: ['job'] + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + receiver: 'null' + routes: + - match: + alertname: DeadMansSwitch + receiver: 'null' + receivers: + - name: 'null' + persistence: + enabled: false + storageClass: "" + accessMode: "ReadWriteOnce" + size: 50Gi + ## Already exist ServiceAccount + ## + serviceAccountName: "" + +exporter-coredns: + enabled: false + apiGroup: "monitoring.coreos.com" + endpoints: [] + ports: + metrics: + port: 9153 + +exporter-kube-controller-manager: + enabled: false + apiGroup: "monitoring.coreos.com" + endpoints: [] + ports: + metrics: + port: 10252 + +exporter-kube-dns: + enabled: false + apiGroup: "monitoring.coreos.com" + endpoints: [] + ports: + metrics: + dnsmasq: + port: 10054 + skydns: + port: 10055 + +exporter-kube-etcd: + enabled: false + apiGroup: "monitoring.coreos.com" + endpoints: [] + ports: + metrics: + scheme: "https" + port: 4001 + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + certFile: "" + keyFile: "" + +exporter-kube-scheduler: + enabled: false + apiGroup: "monitoring.coreos.com" + endpoints: [] + ports: + metrics: + port: 10251 + +exporter-kube-state: + enabled: false + apiGroup: "monitoring.coreos.com" + ports: + metrics: + port: 8080 + image: + repository: quay.io/coreos/kube-state-metrics + tag: v1.4.0 + nodeSelector: + beta.kubernetes.io/os: linux + ## Already exist ServiceAccount + ## + serviceAccountName: "" + +exporter-kubelets: + enabled: false + apiGroup: "monitoring.coreos.com" + +exporter-kubernetes: + enabled: false + apiGroup: "monitoring.coreos.com" + +exporter-node: + enabled: false + apiGroup: "monitoring.coreos.com" + image: + repository: quay.io/prometheus/node-exporter + tag: v0.16.0 + nodeSelector: + beta.kubernetes.io/os: linux + ports: + metrics: + port: 9100 + ## Already exist ServiceAccount + ## + serviceAccountName: "" + +grafana: + enabled: false + level: cluster + apiGroup: "monitoring.coreos.com" + image: + repository: grafana/grafana + tag: 5.3.0 + nodeSelector: + beta.kubernetes.io/os: linux + persistence: + enabled: false + storageClass: "" + accessMode: "ReadWriteOnce" + size: 50Gi + adminUser: "admin" + adminPassword: "admin" + ## Already exist ServiceAccount + ## + serviceAccountName: "" + prometheusDatasourceURL: "http://prometheus-operated:9090" + +prometheus: + enabled: false + apiGroup: "monitoring.coreos.com" + image: + ## Reference to one or more secrets to be used when pulling images + ## + pullSecrets: [] + repository: quay.io/prometheus/prometheus + tag: v2.4.3 + inits: + tools: + repository: maiwj/curl + tag: 7.56.1-r0 + nodeSelector: + beta.kubernetes.io/os: linux + persistence: + enabled: false + storageClass: "" + accessMode: "ReadWriteOnce" + size: 50Gi + alertingEndpoints: [] + secrets: [] + ## Already exist ServiceAccount + ## + serviceAccountName: "" + sidecarsSpec: + - args: + - --log.debug + - agent + - start + - --agent.proxy-url + - http://localhost:9090 + - --listen.address + - $(POD_IP):9090 + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: maiwj/prometheus-auth:0.1.0 + livenessProbe: + failureThreshold: 6 + httpGet: + path: /-/healthy + port: web + scheme: HTTP + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 3 + name: prometheus-agent + ports: + - containerPort: 9090 + name: web + protocol: TCP + readinessProbe: + failureThreshold: 120 + httpGet: + path: /-/ready + port: web + scheme: HTTP + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 3 + listenLocal: true + serviceMonitorsSelector: + matchExpressions: + additionalScrapeConfigsEnabled: true + additionalScrapeConfigs: + - job_name: 'ingress-nginx-endpoints' + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - ingress-nginx + - kube-system + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + - source_labels: [__meta_kubernetes_service_name] + regex: prometheus-operated + action: drop + +metric-expression-project: + enabled: false + +metric-expression-cluster: + enabled: false + +exporter-fluentd: + enabled: false + apiGroup: "monitoring.coreos.com"