Commit fe313655 authored by frank's avatar frank

Add Rancher-Monitoring Chart

        (+) Only use for Rancher 2.0 Monitoring and Alerting
        (+) Support Grafana to proxy with authorization bearer token to
        Prometheus-Auth agent
        (+) Support Prometheus web to proxy with authorization bearer token
        to Prometheus-Auth agent
        (+) Rich metrics for Kubernetes and Rancher
Co-authored-by: default avataraiwantaozi <michelia.feng@gmail.com>
Co-authored-by: default avatarorangedeng <jxfa0043379@hotmail.com>
parent 0d9b4023
apiVersion: {{ template "operator_api_version" . }}
kind: ServiceMonitor
metadata:
labels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
source: rancher-monitoring
{{- if .Values.serviceMonitor.labels }}
{{ toYaml .Values.serviceMonitor.labels | indent 4 }}
{{- end }}
name: {{ template "app.fullname" . }}
spec:
jobLabel: grafana
selector:
matchLabels:
app: {{ template "app.name" . }}
chart: {{ template "app.version" . }}
release: {{ .Release.Name }}
monitoring.cattle.io: "true"
namespaceSelector:
matchNames:
- {{ .Release.Namespace | quote }}
endpoints:
- port: web
interval: 30s
level: cluster
enabledRBAC: true
## Already exist ServiceAccount
##
serviceAccountName: ""
enabledPSP: true
## CRD apiGroup
##
apiGroup: "monitoring.coreos.com"
## Node labels for Grafana pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
##
nodeSelector: {}
## Tolerations for use with node taints
## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations: {}
# - key: "key"
# operator: "Equal"
# value: "value"
# effect: "NoSchedule"
serviceMonitor:
## Custom Labels to be added to ServiceMonitor
##
labels: {}
## Pass extra environment variables to the Grafana container.
##
# extraVars:
# - name: EXTRA_VAR_1
# value: extra-var-value-1
# - name: EXTRA_VAR_2
# value: extra-var-value-2
extraVars:
adminUser: "admin"
adminPassword: "admin"
## Grafana Docker image
##
image:
repository: grafana/grafana
tag: 5.3.0
inits:
tools:
repository: maiwj/curl
tag: 7.56.1-r0
storageSpec: {}
# storageClassName: default
# accessModes:
# - ReadWriteOnce
# resources:
# requests:
# storage: 2Gi
# selector: {}
## Easy way to create persistent data
##
persistence: {}
# enabled: true
# storageClass: gluster
# accessMode: "ReadWriteOnce"
# size: 50Gi
## Resource limits & requests
## Ref: https://kubernetes.io/docs/user-guide/compute-resources/
resources: {}
# limits:
# memory: 200Mi
# cpu: 200m
# requests:
# memory: 100Mi
# cpu: 100m
## A list of additional configmaps that contain -dashboard.json and/or -datasource.json files
## that should be imported into grafana.
dashboardConfigmaps: []
prometheusDatasourceURL: ""
grafanaProxy:
repository: nginx
tag: 1.15.2
grafanaWatcher:
repository: quay.io/coreos/grafana-watcher
tag: v0.0.8
## Resource limits & requests
## Ref: https://kubernetes.io/docs/user-guide/compute-resources/
resources: {}
#requests:
# memory: "16Mi"
# cpu: "50m"
#limits:
# memory: "32Mi"
# cpu: "100m"
apiVersion: v1
description: Creates Metrics CRD of Rancher monitoring graph
engine: gotpl
maintainers:
- name: aiwantaozi
email: michelia.feng@gmail.com
name: metric-expression-cluster
version: 0.0.1
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-latency-milliseconds-avg
labels:
app: metric-expression
component: apiserver
details: "false"
level: cluster
metric: request-latency-milliseconds-avg
source: rancher-monitoring
spec:
expression: avg(apiserver_request_latencies_sum / apiserver_request_latencies_count)
by (instance) /1e+06
legendFormat: '[[instance]]'
description: apiserver request latency milliseconds avg
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-latency-milliseconds-avg-details
labels:
app: metric-expression
component: apiserver
details: "true"
level: cluster
metric: request-latency-milliseconds-avg
source: rancher-monitoring
spec:
expression: avg(apiserver_request_latencies_sum / apiserver_request_latencies_count)
by (instance, verb) /1e+06
legendFormat: '[[verb]]([[instance]])'
description: apiserver request latency milliseconds avg
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-count-sum-rate
labels:
app: metric-expression
component: apiserver
details: "false"
graph: request-count
level: cluster
metric: request-count-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(apiserver_request_count[5m])) by (instance)
legendFormat: '[[instance]]'
description: apiserver request count sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-count-sum-rate-details
labels:
app: metric-expression
component: apiserver
details: "true"
graph: request-count
level: cluster
metric: request-count-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(apiserver_request_count[5m])) by (instance,
code)
legendFormat: '[[code]]([[instance]])'
description: apiserver request count sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-error-count-sum-rate
labels:
app: metric-expression
component: apiserver
details: "false"
graph: request-count
level: cluster
metric: request-error-count-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(apiserver_request_count{instance=~"$instance", code!~"2.."}[5m]))
by (instance)
legendFormat: '[[instance]]'
description: apiserver request error count sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: apiserver-request-error-count-sum-rate-details
labels:
app: metric-expression
component: apiserver
details: "true"
graph: request-count
level: cluster
metric: request-error-count-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(apiserver_request_count{instance=~"$instance", code!~"2.."}[5m]))
by (instance, code)
legendFormat: '[[code]]([[instance]])'
description: apiserver request error count sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-disk-io-reads-bytes-sum-rate
labels:
app: metric-expression
component: cluster
details: "false"
graph: disk-io
level: cluster
metric: disk-io-reads-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_disk_read_bytes_total[5m])) by
() * 8 / 1024
legendFormat: Read
description: cluster disk io reads bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-disk-io-reads-bytes-sum-rate-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: disk-io
level: cluster
metric: disk-io-reads-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_disk_read_bytes_total[5m])) by
(instance) * 8 / 1024
legendFormat: Read([[instance]])
description: cluster disk io reads bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-bytes-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-io
level: cluster
metric: network-transmit-bytes-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
* 8 / 1024
legendFormat: Transmit
description: cluster network transmit bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-bytes-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-io
level: cluster
metric: network-transmit-bytes-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance) * 8 / 1024
legendFormat: Transmit([[instance]])
description: cluster network transmit bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-load-5
labels:
app: metric-expression
component: cluster
details: "false"
graph: cpu-load
level: cluster
metric: cpu-load-5
source: rancher-monitoring
spec:
expression: sum(node_load5) / count(node_cpu_seconds_total{mode="system"})
legendFormat: Load5
description: cluster cpu load 5
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-load-5-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: cpu-load
level: cluster
metric: cpu-load-5
source: rancher-monitoring
spec:
expression: sum(node_load5) by (instance) / count(node_cpu_seconds_total{mode="system"})
by (instance)
legendFormat: Load5([[instance]])
description: cluster cpu load 5
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-load-1
labels:
app: metric-expression
component: cluster
details: "false"
graph: cpu-load
level: cluster
metric: cpu-load-1
source: rancher-monitoring
spec:
expression: sum(node_load1) / count(node_cpu_seconds_total{mode="system"})
legendFormat: Load1
description: cluster cpu load 1
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-load-1-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: cpu-load
level: cluster
metric: cpu-load-1
source: rancher-monitoring
spec:
expression: sum(node_load1) by (instance) / count(node_cpu_seconds_total{mode="system"})
by (instance)
legendFormat: Load1([[instance]])
description: cluster cpu load 1
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-disk-io-writes-bytes-sum-rate
labels:
app: metric-expression
component: cluster
details: "false"
graph: disk-io
level: cluster
metric: disk-io-writes-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_disk_written_bytes_total[5m]))
* 8 / 1024
legendFormat: Write
description: cluster disk io writes bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-disk-io-writes-bytes-sum-rate-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: disk-io
level: cluster
metric: disk-io-writes-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(node_disk_written_bytes_total[5m]))
by (instance) * 8 / 1024
legendFormat: Write([[instance]])
description: cluster disk io writes bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-fs-usage-percent
labels:
app: metric-expression
component: cluster
details: "false"
level: cluster
metric: fs-usage-percent
source: rancher-monitoring
spec:
expression: (sum(node_filesystem_size_bytes{device!="rootfs"})
- sum(node_filesystem_free_bytes{device!="rootfs"})
) / sum(node_filesystem_size_bytes{device!="rootfs"})
legendFormat: Disk usage
description: cluster fs usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-fs-usage-percent-details
labels:
app: metric-expression
component: cluster
details: "true"
level: cluster
metric: fs-usage-percent
source: rancher-monitoring
spec:
expression: (sum(node_filesystem_size_bytes{device!="rootfs"})
by (instance) - sum(node_filesystem_free_bytes{device!="rootfs"})
by (instance)) / sum(node_filesystem_size_bytes{device!="rootfs"})
by (instance)
legendFormat: '[[instance]]'
description: cluster fs usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-errors-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-packet
level: cluster
metric: network-receive-errors-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
legendFormat: Receive errors
description: cluster network receive errors sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-errors-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-packet
level: cluster
metric: network-receive-errors-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance)
legendFormat: Receive errors([[instance]])
description: cluster network receive errors sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-load-15
labels:
app: metric-expression
component: cluster
details: "false"
graph: cpu-load
level: cluster
metric: cpu-load-15
source: rancher-monitoring
spec:
expression: sum(node_load15) / count(node_cpu_seconds_total{mode="system"})
legendFormat: Load15
description: cluster cpu load 15
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-load-15-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: cpu-load
level: cluster
metric: cpu-load-15
source: rancher-monitoring
spec:
expression: sum(node_load15) by (instance) / count(node_cpu_seconds_total{mode="system"})
by (instance)
legendFormat: Load15([[instance]])
description: cluster cpu load 15
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-bytes-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-io
level: cluster
metric: network-receive-bytes-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
* 8 / 1024
legendFormat: Receive
description: cluster network receive bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-bytes-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-io
level: cluster
metric: network-receive-bytes-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance) * 8 / 1024
legendFormat: Receive([[instance]])
description: cluster network receive bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-packets-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-packet
level: cluster
metric: network-receive-packets-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
legendFormat: Receive packets
description: cluster network receive packets sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-packets-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-packet
level: cluster
metric: network-receive-packets-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance)
legendFormat: Receive packets([[instance]])
description: cluster network receive packets sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-errors-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-packet
level: cluster
metric: network-transmit-errors-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
legendFormat: Transmit errors
description: cluster network transmit errors sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-errors-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-packet
level: cluster
metric: network-transmit-errors-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance)
legendFormat: Transmit errors([[instance]])
description: cluster network transmit errors sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-packets-dropped-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-packet
level: cluster
metric: network-receive-packets-dropped-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
legendFormat: Receive dropped
description: cluster network receive packets dropped sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-receive-packets-dropped-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-packet
level: cluster
metric: network-receive-packets-dropped-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_receive_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance)
legendFormat: Receive dropped([[instance]])
description: cluster network receive packets dropped sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-packets-dropped-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-packet
level: cluster
metric: network-transmit-packets-dropped-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
legendFormat: Transmit dropped
description: cluster network transmit packets dropped sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-packets-dropped-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-packet
level: cluster
metric: network-transmit-packets-dropped-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance)
legendFormat: Transmit dropped([[instance]])
description: cluster network transmit packets dropped sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-packets-sum
labels:
app: metric-expression
component: cluster
details: "false"
graph: network-packet
level: cluster
metric: network-transmit-packets-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
legendFormat: Transmit packets
description: cluster network transmit packets sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-network-transmit-packets-sum-details
labels:
app: metric-expression
component: cluster
details: "true"
graph: network-packet
level: cluster
metric: network-transmit-packets-sum
source: rancher-monitoring
spec:
expression: sum(rate(node_network_transmit_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*"}[5m]))
by (instance)
legendFormat: Transmit packets([[instance]])
description: cluster network transmit packets sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-usage-seconds-sum-rate
labels:
app: metric-expression
component: cluster
details: "false"
level: cluster
metric: cpu-usage-seconds-sum-rate
source: rancher-monitoring
spec:
expression: 1 - (avg(irate(node_cpu_seconds_total{mode="idle"}[5m])))
legendFormat: CPU usage
description: cluster cpu usage seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-cpu-usage-seconds-sum-rate-details
labels:
app: metric-expression
component: cluster
details: "true"
level: cluster
metric: cpu-usage-seconds-sum-rate
source: rancher-monitoring
spec:
expression: 1 - (avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance))
legendFormat: '[[instance]]'
description: cluster cpu usage seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-memory-usage-percent
labels:
app: metric-expression
component: cluster
details: "false"
level: cluster
metric: memory-usage-percent
source: rancher-monitoring
spec:
expression: 1 - sum(node_memory_MemAvailable_bytes)
/ sum(node_memory_MemTotal_bytes)
legendFormat: Memory usage
description: cluster memory usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: cluster-memory-usage-percent-details
labels:
app: metric-expression
component: cluster
details: "true"
level: cluster
metric: memory-usage-percent
source: rancher-monitoring
spec:
expression: 1 - sum(node_memory_MemAvailable_bytes) by (instance)
/ sum(node_memory_MemTotal_bytes) by (instance)
legendFormat: '[[instance]]'
description: cluster memory usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-cfs-throttled-seconds-sum-rate
labels:
app: metric-expression
component: container
details: "false"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU cfs throttled
description: container cpu cfs throttled seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-cfs-throttled-seconds-sum-rate-details
labels:
app: metric-expression
component: container
details: "true"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU cfs throttled([[container_name]])
description: container cpu cfs throttled seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-usage-seconds-sum-rate
labels:
app: metric-expression
component: container
details: "false"
graph: container-cpu-usage
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_usage_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU usage
description: container cpu usage seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-usage-seconds-sum-rate-details
labels:
app: metric-expression
component: container
details: "true"
graph: container-cpu-usage
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_usage_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU usage([[container_name]])
description: container cpu usage seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-system-seconds-sum-rate
labels:
app: metric-expression
component: container
details: "false"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_system_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU system seconds
description: container cpu system seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-system-seconds-sum-rate-details
labels:
app: metric-expression
component: container
details: "true"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_system_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU system seconds([[container_name]])
description: container cpu system seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-user-seconds-sum-rate
labels:
app: metric-expression
component: container
details: "false"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_user_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU user seconds
description: container cpu user seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-cpu-user-seconds-sum-rate-details
labels:
app: metric-expression
component: container
details: "true"
graph: container-cpu-usage-details
level: project
source: rancher-monitoring
spec:
expression: sum(rate(container_cpu_user_seconds_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name)
legendFormat: CPU user seconds([[container_name]])
description: container cpu user seconds sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-memory-usage-percent
labels:
app: metric-expression
component: container
details: "false"
level: project
metric: memory-usage-percent
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name=~"$containerName"}) by (container_name) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace=~"$namespace",
pod=~"$podName", container=~"$containerName"},"container_name", "", "container"))
by (container_name)
legendFormat: Memory
description: container memory usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-memory-usage-percent-details
labels:
app: metric-expression
component: container
details: "true"
level: project
metric: memory-usage-percent
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name=~"$containerName"}) by (container_name) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace=~"$namespace",
pod=~"$podName", container=~"$containerName"},"container_name", "", "container"))
by (container_name)
legendFormat: Memory([[container_name]])
description: container memory usage percent
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-memory-usage-bytes-sum
labels:
app: metric-expression
component: container
details: "false"
level: project
metric: memory-usage-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{name!~"POD", namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}) by (container_name)
legendFormat: Memory usage
description: container memory usage bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-memory-usage-bytes-sum-details
labels:
app: metric-expression
component: container
details: "true"
level: project
metric: memory-usage-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_memory_working_set_bytes{name!~"POD", namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}) by (container_name)
legendFormat: Memory usage([[container_name]])
description: container memory usage bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-fs-bytes-sum
labels:
app: metric-expression
component: container
details: "false"
level: project
metric: fs-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_fs_usage_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name=~"$containerName"}) by (container_name)
legendFormat: Filesystem usage
description: container fs bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-fs-bytes-sum-details
labels:
app: metric-expression
component: container
details: "true"
level: project
metric: fs-bytes-sum
source: rancher-monitoring
spec:
expression: sum(container_fs_usage_bytes{namespace=~"$namespace", pod_name=~"$podName",
container_name=~"$containerName"}) by (container_name)
legendFormat: Filesystem usage([[container_name]])
description: container fs bytes sum
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-disk-io-writes-bytes-sum-rate
labels:
app: metric-expression
component: container
details: "false"
graph: disk-io
level: project
metric: disk-io-writes-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_writes_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name) * 8 / 1024
legendFormat: Write
description: container disk io writes bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-disk-io-writes-bytes-sum-rate-details
labels:
app: metric-expression
component: container
details: "true"
graph: disk-io
level: project
metric: disk-io-writes-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_writes_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name) * 8 / 1024
legendFormat: Write([[container_name]])
description: container disk io writes bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-disk-io-reads-bytes-sum-rate
labels:
app: metric-expression
component: container
details: "false"
graph: disk-io
level: project
metric: disk-io-reads-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_reads_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name) * 8 / 1024
legendFormat: Read
description: container disk io reads bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: container-disk-io-reads-bytes-sum-rate-details
labels:
app: metric-expression
component: container
details: "true"
graph: disk-io
level: project
metric: disk-io-reads-bytes-sum-rate
source: rancher-monitoring
spec:
expression: sum(rate(container_fs_reads_bytes_total{namespace=~"$namespace",pod_name=~"$podName",
container_name=~"$containerName"}[5m])) by (container_name) * 8 / 1024
legendFormat: Read([[container_name]])
description: container disk io reads bytes sum rate
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-volumes-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: volumes-depth
source: rancher-monitoring
spec:
expression: sum(volumes_depth)
legendFormat: Volumes depth
description: controllermanager volumes depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-volumes-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: volumes-depth
source: rancher-monitoring
spec:
expression: sum(volumes_depth) by (instance)
legendFormat: Volumes depth([[instance]])
description: controllermanager volumes depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-deployment-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: deployment-depth
source: rancher-monitoring
spec:
expression: sum(deployment_depth)
legendFormat: Deployment depth
description: controllermanager deployment adds
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-deployment-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: deployment-depth
source: rancher-monitoring
spec:
expression: sum(deployment_depth) by (instance)
legendFormat: Deployment depth([[instance]])
description: controllermanager deployment adds
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-replicaset-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: replicaset-depth
source: rancher-monitoring
spec:
expression: sum(replicaset_depth)
legendFormat: Replicaset depth
description: controllermanager replicaset depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-replicaset-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: replicaset-depth
source: rancher-monitoring
spec:
expression: sum(replicaset_depth) by (instance)
legendFormat: Replicaset depth([[instance]])
description: controllermanager replicaset depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-service-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: service-depth
source: rancher-monitoring
spec:
expression: sum(service_depth)
legendFormat: Service depth
description: controllermanager service depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-service-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: service-depth
source: rancher-monitoring
spec:
expression: sum(service_depth) by (instance)
legendFormat: Service depth([[instance]])
description: controllermanager service depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-serviceaccount-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: serviceaccount-depth
source: rancher-monitoring
spec:
expression: sum(serviceaccount_depth)
legendFormat: Serviceaccount depth
description: controllermanager serviceaccount depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-serviceaccount-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: serviceaccount-depth
source: rancher-monitoring
spec:
expression: sum(serviceaccount_depth) by (instance)
legendFormat: Serviceaccount depth([[instance]])
description: controllermanager serviceaccount depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-endpoint-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: endpoint-depth
source: rancher-monitoring
spec:
expression: sum(endpoint_depth)
legendFormat: Endpoint depth
description: controllermanager endpoint depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-endpoint-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: endpoint-depth
source: rancher-monitoring
spec:
expression: sum(endpoint_depth) by (instance)
legendFormat: Endpoint depth([[instance]])
description: controllermanager endpoint depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-daemonset-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: daemonset-depth
source: rancher-monitoring
spec:
expression: sum(daemonset_depth)
legendFormat: Daemonset depth
description: controllermanager daemonset depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-daemonset-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: daemonset-depth
source: rancher-monitoring
spec:
expression: sum(daemonset_depth) by (instance)
legendFormat: Daemonset depth([[instance]])
description: controllermanager daemonset depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-deployment-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: deployment-depth
source: rancher-monitoring
spec:
expression: sum(deployment_depth)
legendFormat: Deployment depth
description: controllermanager deployment depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-deployment-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: deployment-depth
source: rancher-monitoring
spec:
expression: sum(deployment_depth) by (instance)
legendFormat: Deployment depth([[instance]])
description: controllermanager deployment depth
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-statefulset-depth
labels:
app: metric-expression
component: controllermanager
details: "false"
level: cluster
metric: statefulset-depth
source: rancher-monitoring
spec:
expression: sum(statefulset_depth)
legendFormat: Statefulset depth
description: controllermanager statefulset adds
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: controllermanager-statefulset-depth-details
labels:
app: metric-expression
component: controllermanager
details: "true"
level: cluster
metric: statefulset-depth
source: rancher-monitoring
spec:
expression: sum(statefulset_depth) by (instance)
legendFormat: Statefulset depth([[instance]])
description: controllermanager statefulset adds
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: input-record-number
labels:
app: metric-expression
component: fluentd
details: "false"
level: cluster
metric: input-record
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_input_status_num_records_total[5m]))
legendFormat: Input record number
description: Fluentd input status num records total
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: input-record-number-details
labels:
app: metric-expression
component: fluentd
details: "true"
level: cluster
metric: input-record
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_input_status_num_records_total[5m])) by (instance)
legendFormat: Input record number([[instance]])
description: Fluentd input status num records total
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: output-record-number
labels:
app: metric-expression
component: fluentd
details: "false"
level: cluster
metric: output-record
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_num_records_total[5m]))
legendFormat: Output record number
description: Fluentd output status num records total
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: output-record-number-details
labels:
app: metric-expression
component: fluentd
details: "true"
level: cluster
metric: output-record
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_num_records_total[5m])) by (instance)
legendFormat: Output record number([[instance]])
description: Fluentd output status num records total
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: output-errors
labels:
app: metric-expression
component: fluentd
details: "false"
level: cluster
metric: output-errors
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_num_errors[5m]))
legendFormat: Plugin Output errors
description: Fluentd output errors number
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: output-errors-details
labels:
app: metric-expression
component: fluentd
details: "true"
level: cluster
metric: output-errors
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_num_errors[5m])) by (type)
legendFormat: Plugin([[type]])
description: Fluentd output errors number
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: buffer-queue-length
labels:
app: metric-expression
component: fluentd
details: "false"
level: cluster
metric: buffer-queue-length
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_buffer_queue_length[5m]))
legendFormat: Buffer queue
description: Fluentd Buffer queue length
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: buffer-queue-length-details
labels:
app: metric-expression
component: fluentd
details: "true"
level: cluster
metric: buffer-queue-length
source: rancher-monitoring
spec:
expression: sum(rate(fluentd_output_status_buffer_queue_length[5m])) by (instance)
legendFormat: '[[instance]]'
description: Fluentd Buffer queue length
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-reading
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="reading"})
legendFormat: Reading connections
description: ingresscontroller nginx connection reading
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-reading-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="reading"}) by (instance)
legendFormat: Reading connections
description: ingresscontroller nginx connection reading
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-waiting
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="waiting"})
legendFormat: Nginx waiting connection
description: ingresscontroller nginx connection waiting
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-waiting-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="waiting"}) by (instance)
legendFormat: Nginx waiting connection
description: ingresscontroller nginx connection waiting
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-writing
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="writing"})
legendFormat: Writing connections
description: ingresscontroller nginx connection writing
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-writing-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="writing"}) by (instance)
legendFormat: Writing connections
description: ingresscontroller nginx connection writing
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-accepted
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="accepted"})
legendFormat: Accepted connections
description: ingresscontroller nginx connection accepted
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-accepted-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="accepted"}) by (instance)
legendFormat: Accepted connections
description: ingresscontroller nginx connection accepted
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-active
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="active"})
legendFormat: Active connections
description: ingresscontroller nginx connection active
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-active-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="active"}) by (instance)
legendFormat: Active connections
description: ingresscontroller nginx connection active
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-handled
labels:
app: metric-expression
component: ingresscontroller
details: "false"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="handled"})
legendFormat: Handled connections
description: ingresscontroller nginx connection handled
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-connection-handled-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
graph: nginx-connection
level: cluster
source: rancher-monitoring
spec:
expression: sum(nginx_connections{state="handled"}) by (instance)
legendFormat: Handled connections
description: ingresscontroller nginx connection handled
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-upstream-response-seconds-by-host
labels:
app: metric-expression
component: ingresscontroller
details: "false"
level: cluster
metric: upstream-response-seconds
source: rancher-monitoring
spec:
expression: sort_desc(max(upstream_response_time_seconds_sum) by (host, path))
legendFormat: Upstream response seconds(host:[[host]] path:[[path]])
description: ingresscontroller nginx upstream response seconds by host
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-upstream-response-seconds-by-host-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
level: cluster
metric: upstream-response-seconds
source: rancher-monitoring
spec:
expression: sort_desc(max(upstream_response_time_seconds_sum) by (host, path))
legendFormat: Upstream response seconds(host:[[host]] path:[[path]])
description: ingresscontroller nginx upstream response seconds by host
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-process-seconds-by-path
labels:
app: metric-expression
component: ingresscontroller
details: "false"
level: cluster
metric: request-process-seconds
source: rancher-monitoring
spec:
expression: max(request_duration_seconds_bucket{le="1"}) by (host, path)
legendFormat: Request duration(host:[[host]] path:[[path]])
description: ingresscontroller nginx request duration by path
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: ingresscontroller-nginx-process-seconds-by-path-details
labels:
app: metric-expression
component: ingresscontroller
details: "true"
level: cluster
metric: request-process-seconds
source: rancher-monitoring
spec:
expression: max(request_duration_seconds_bucket{le="1"}) by (host, path)
legendFormat: Request duration(host:[[host]] path:[[path]])
description: ingresscontroller nginx request duration by path
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-e-2-e-scheduling-latency-seconds-quantile
labels:
app: metric-expression
component: scheduler
details: "false"
level: cluster
metric: e-2-e-scheduling-latency-seconds-quantile
source: rancher-monitoring
spec:
expression: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
by (le, instance)) / 1e+06
legendFormat: E2E latency
description: scheduler e 2 e scheduling latency seconds quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-e-2-e-scheduling-latency-seconds-quantile-details
labels:
app: metric-expression
component: scheduler
details: "true"
level: cluster
metric: e-2-e-scheduling-latency-seconds-quantile
source: rancher-monitoring
spec:
expression: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
by (le, instance)) / 1e+06
legendFormat: E2E latency([[instance]])
description: scheduler e 2 e scheduling latency seconds quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-total-preemption-attempts
labels:
app: metric-expression
component: scheduler
details: "false"
level: cluster
metric: total-preemption-attempts
source: rancher-monitoring
spec:
expression: sum(rate(scheduler_total_preemption_attempts[5m])) by (instance)
legendFormat: Preemption attempts
description: Scheduler scheduling algorithm latency seconds quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-total-preemption-attempts-details
labels:
app: metric-expression
component: scheduler
details: "true"
level: cluster
metric: total-preemption-attempts
source: rancher-monitoring
spec:
expression: sum(rate(scheduler_total_preemption_attempts[5m]))
legendFormat: Preemption attempts([[instance]])
description: Scheduler scheduling algorithm latency seconds quantile
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-pod-unscheduler
labels:
app: metric-expression
component: scheduler
details: "false"
level: cluster
metric: pod-unscheduler
source: rancher-monitoring
spec:
expression: sum(kube_pod_status_scheduled{condition="false"})
legendFormat: Scheduling failed pods
description: pod unscheduler
---
kind: MonitorMetric
apiVersion: management.cattle.io/v3
metadata:
name: scheduler-pod-unscheduler-details
labels:
app: metric-expression
component: scheduler
details: "true"
level: cluster
metric: pod-unscheduler
source: rancher-monitoring
spec:
expression: sum(kube_pod_status_scheduled{condition="false"})
legendFormat: Scheduling failed pods
description: pod unscheduler
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: apiserver
cluster-graph: kube-component
name: apiserver-request-latency
spec:
resourceType: apiserver
displayResourceType: kube-component
priority: 300
title: apiserver-request-latency
detailsMetricsSelector:
component: apiserver
details: "true"
metric: request-latency-milliseconds-avg
metricsSelector:
details: "false"
component: apiserver
metric: request-latency-milliseconds-avg
yAxis:
unit: ms
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: apiserver
cluster-graph: kube-component
name: apiserver-request-count
spec:
resourceType: apiserver
displayResourceType: kube-component
priority: 301
title: apiserver-request-count
detailsMetricsSelector:
component: apiserver
details: "true"
metric: request-count-sum-rate
metricsSelector:
details: "false"
component: apiserver
metric: request-count-sum-rate
yAxis:
unit: number
\ No newline at end of file
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: controllermanager
cluster-graph: kube-component
name: controllermanager-queue-depth
spec:
resourceType: controllermanager
displayResourceType: kube-component
priority: 310
title: controllermanager-queue-depth
metricsSelector:
details: "false"
component: controllermanager
detailsMetricsSelector:
details: "true"
component: controllermanager
yAxis:
unit: number
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: ingresscontroller
cluster-graph: kube-component
name: ingresscontroller-nginx-connection
spec:
resourceType: ingresscontroller
displayResourceType: kube-component
priority: 330
title: ingresscontroller-nginx-connection
metricsSelector:
details: "false"
component: ingresscontroller
graph: nginx-connection
detailsMetricsSelector:
details: "true"
component: ingresscontroller
graph: nginx-connection
yAxis:
unit: number
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: ingresscontroller
cluster-graph: kube-component
name: ingresscontroller-request-process-time
spec:
resourceType: ingresscontroller
displayResourceType: kube-component
priority: 331
title: ingresscontroller-request-process-time
metricsSelector:
details: "false"
component: ingresscontroller
metric: request-process-seconds
detailsMetricsSelector:
details: "true"
component: ingresscontroller
metric: request-process-seconds
yAxis:
unit: seconds
---
apiVersion: management.cattle.io/v3
kind: ClusterMonitorGraph
metadata:
labels:
app: metric-expression
source: rancher-monitoring
level: cluster
component: ingresscontroller
cluster-graph: kube-component
name: ingresscontroller-upstream-response-seconds
spec:
resourceType: ingresscontroller
displayResourceType: kube-component
priority: 332
title: ingresscontroller-upstream-response-seconds
metricsSelector:
details: "false"
component: ingresscontroller
metric: upstream-response-seconds
detailsMetricsSelector:
details: "true"
component: ingresscontroller
metric: upstream-response-seconds
yAxis:
unit: seconds
graphType: singlestat
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment