mirror of
https://github.com/kubernetes/node-problem-detector.git
synced 2026-02-14 18:09:57 +00:00
Add network interface stats
We do not have to collect these often, so for now set the collection interval to 120s (even though the Stackdriver exporter is still set to export every 60s).
This commit is contained in:
55
config/net-cgroup-system-stats-monitor.json
Normal file
55
config/net-cgroup-system-stats-monitor.json
Normal file
@@ -0,0 +1,55 @@
|
||||
{
|
||||
"net": {
|
||||
"metricsConfigs": {
|
||||
"net/rx_bytes": {
|
||||
"displayName": "net/rx_bytes"
|
||||
},
|
||||
"net/rx_packets": {
|
||||
"displayName": "net/rx_packets"
|
||||
},
|
||||
"net/rx_errors": {
|
||||
"displayName": "net/rx_errors"
|
||||
},
|
||||
"net/rx_dropped": {
|
||||
"displayName": "net/rx_dropped"
|
||||
},
|
||||
"net/rx_fifo": {
|
||||
"displayName": "net/rx_fifo"
|
||||
},
|
||||
"net/rx_frame": {
|
||||
"displayName": "net/rx_frame"
|
||||
},
|
||||
"net/rx_compressed": {
|
||||
"displayName": "net/rx_compressed"
|
||||
},
|
||||
"net/rx_multicast": {
|
||||
"displayName": "net/rx_multicast"
|
||||
},
|
||||
"net/tx_bytes": {
|
||||
"displayName": "net/tx_bytes"
|
||||
},
|
||||
"net/tx_packets": {
|
||||
"displayName": "net/tx_packets"
|
||||
},
|
||||
"net/tx_errors": {
|
||||
"displayName": "net/tx_errors"
|
||||
},
|
||||
"net/tx_dropped": {
|
||||
"displayName": "net/tx_dropped"
|
||||
},
|
||||
"net/tx_fifo": {
|
||||
"displayName": "net/tx_fifo"
|
||||
},
|
||||
"net/tx_collisions": {
|
||||
"displayName": "net/tx_collisions"
|
||||
},
|
||||
"net/tx_carrier": {
|
||||
"displayName": "net/tx_carrier"
|
||||
},
|
||||
"net/tx_compressed": {
|
||||
"displayName": "net/tx_compressed"
|
||||
}
|
||||
}
|
||||
},
|
||||
"invokeInterval": "120s"
|
||||
}
|
||||
@@ -10,7 +10,7 @@ ExecStart=/home/kubernetes/bin/node-problem-detector --v=2 --logtostderr --enabl
|
||||
--exporter.stackdriver=/home/kubernetes/node-problem-detector/config/exporter/stackdriver-exporter.json \
|
||||
--config.system-log-monitor=/home/kubernetes/node-problem-detector/config/kernel-monitor.json,/home/kubernetes/node-problem-detector/config/docker-monitor.json,/home/kubernetes/node-problem-detector/config/systemd-monitor.json \
|
||||
--config.custom-plugin-monitor=/home/kubernetes/node-problem-detector/config/kernel-monitor-counter.json,/home/kubernetes/node-problem-detector/config/systemd-monitor-counter.json \
|
||||
--config.system-stats-monitor=/home/kubernetes/node-problem-detector/config/system-stats-monitor.json
|
||||
--config.system-stats-monitor=/home/kubernetes/node-problem-detector/config/system-stats-monitor.json,/home/kubernetes/node-problem-detector/config/net-cgroup-system-stats-monitor.json
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
@@ -73,6 +73,22 @@ var NPDMetricToSDMetric = map[metrics.MetricID]string{
|
||||
metrics.SystemProcsRunning: "kubernetes.io/internal/node/guest/system/procs_running",
|
||||
metrics.SystemProcsBlocked: "kubernetes.io/internal/node/guest/system/procs_blocked",
|
||||
metrics.SystemInterruptsTotal: "kubernetes.io/internal/node/guest/system/interrupts_total",
|
||||
metrics.NetDevRxBytes: "kubernetes.io/internal/node/guest/net/rx_bytes",
|
||||
metrics.NetDevRxPackets: "kubernetes.io/internal/node/guest/net/rx_packets",
|
||||
metrics.NetDevRxErrors: "kubernetes.io/internal/node/guest/net/rx_errors",
|
||||
metrics.NetDevRxDropped: "kubernetes.io/internal/node/guest/net/rx_dropped",
|
||||
metrics.NetDevRxFifo: "kubernetes.io/internal/node/guest/net/rx_fifo",
|
||||
metrics.NetDevRxFrame: "kubernetes.io/internal/node/guest/net/rx_frame",
|
||||
metrics.NetDevRxCompressed: "kubernetes.io/internal/node/guest/net/rx_compressed",
|
||||
metrics.NetDevRxMulticast: "kubernetes.io/internal/node/guest/net/rx_multicast",
|
||||
metrics.NetDevTxBytes: "kubernetes.io/internal/node/guest/net/tx_bytes",
|
||||
metrics.NetDevTxPackets: "kubernetes.io/internal/node/guest/net/tx_packets",
|
||||
metrics.NetDevTxErrors: "kubernetes.io/internal/node/guest/net/tx_errors",
|
||||
metrics.NetDevTxDropped: "kubernetes.io/internal/node/guest/net/tx_dropped",
|
||||
metrics.NetDevTxFifo: "kubernetes.io/internal/node/guest/net/tx_fifo",
|
||||
metrics.NetDevTxCollisions: "kubernetes.io/internal/node/guest/net/tx_collisions",
|
||||
metrics.NetDevTxCarrier: "kubernetes.io/internal/node/guest/net/tx_carrier",
|
||||
metrics.NetDevTxCompressed: "kubernetes.io/internal/node/guest/net/tx_compressed",
|
||||
}
|
||||
|
||||
func getMetricTypeConversionFunction(customMetricPrefix string) func(*view.View) string {
|
||||
|
||||
@@ -77,9 +77,9 @@ Below metrics are collected from `memory` component:
|
||||
* `memory_unevictable_used`: [Unevictable memory][/proc doc] usage, in Bytes.
|
||||
* `memory_dirty_used`: Dirty pages usage, in Bytes. Memory usage state is reported under the `state` metric label (e.g. `dirty`, `writeback`). `dirty` means the memory is waiting to be written back to disk, and `writeback` means the memory is actively being written back to disk.
|
||||
|
||||
### OS features
|
||||
### OS features
|
||||
|
||||
The guest OS features such as KTD kernel, GPU support are collected. Below are the OS
|
||||
The guest OS features such as KTD kernel, GPU support are collected. Below are the OS
|
||||
features collected:
|
||||
|
||||
* `KTD`: Enabled, if KTD feature is enabled on OS
|
||||
@@ -87,8 +87,31 @@ features collected:
|
||||
* `KernelModuleIntegrity`: Enabled, if load pin security is enabled and modules are signed.
|
||||
* `GPUSupport`: Enabled, if OS has GPU drivers installed like nvidia.
|
||||
* `UnknownModules`: Enabled, if the OS has third party kernel modules installed.
|
||||
UnknownModules are derived from the /proc/modules compared with the known-modules.json.
|
||||
UnknownModules are derived from the /proc/modules compared with the known-modules.json.
|
||||
|
||||
And an option:
|
||||
`knownModulesConfigPath`: The path to the file that contains the known modules(default
|
||||
modules) can be set. By default, the path is set to `known-modules.json`
|
||||
`knownModulesConfigPath`: The path to the file that contains the known modules(default
|
||||
modules) can be set. By default, the path is set to `known-modules.json`
|
||||
|
||||
### IP Stats (Net Dev)
|
||||
|
||||
Below metrics are collected from `net` component:
|
||||
|
||||
* `net/rx_bytes`: Cumulative count of bytes received.
|
||||
* `net/rx_packets`: Cumulative count of packets received.
|
||||
* `net/rx_errors`: Cumulative count of receive errors encountered.
|
||||
* `net/rx_dropped`: Cumulative count of packets dropped while receiving.
|
||||
* `net/rx_fifo`: Cumulative count of FIFO buffer errors.
|
||||
* `net/rx_frame`: Cumulative count of packet framing errors.
|
||||
* `net/rx_compressed`: Cumulative count of compressed packets received by the device driver.
|
||||
* `net/rx_multicast`: Cumulative count of multicast frames received by the device driver.
|
||||
* `net/tx_bytes`: Cumulative count of bytes transmitted.
|
||||
* `net/tx_packets`: Cumulative count of packets transmitted.
|
||||
* `net/tx_errors`: Cumulative count of transmit errors encountered.
|
||||
* `net/tx_dropped`: Cumulative count of packets dropped while transmitting.
|
||||
* `net/tx_fifo`: Cumulative count of FIFO buffer errors.
|
||||
* `net/tx_collisions`: Cumulative count of collisions detected on the interface.
|
||||
* `net/tx_carrier`: Cumulative count of carrier losses detected by the device driver.
|
||||
* `net/tx_compressed`: Cumulative count of compressed packets transmitted by the device driver.
|
||||
|
||||
All of the above have `interface_name` label for the net interface.
|
||||
|
||||
@@ -42,3 +42,6 @@ const osVersionLabel = "os_version"
|
||||
|
||||
// osVersionLabel labels the kernel version
|
||||
const kernelVersionLabel = "kernel_version"
|
||||
|
||||
// interfaceNameLabel labels the network interface name
|
||||
const interfaceNameLabel = "interface_name"
|
||||
|
||||
330
pkg/systemstatsmonitor/net_collector.go
Normal file
330
pkg/systemstatsmonitor/net_collector.go
Normal file
@@ -0,0 +1,330 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package systemstatsmonitor
|
||||
|
||||
import (
|
||||
"github.com/golang/glog"
|
||||
"github.com/prometheus/procfs"
|
||||
"github.com/shirou/gopsutil/host"
|
||||
ssmtypes "k8s.io/node-problem-detector/pkg/systemstatsmonitor/types"
|
||||
"k8s.io/node-problem-detector/pkg/util"
|
||||
"k8s.io/node-problem-detector/pkg/util/metrics"
|
||||
)
|
||||
|
||||
type netCollector struct {
|
||||
tags map[string]string
|
||||
|
||||
mNetDevRxBytes *metrics.Int64Metric
|
||||
mNetDevRxPackets *metrics.Int64Metric
|
||||
mNetDevRxErrors *metrics.Int64Metric
|
||||
mNetDevRxDropped *metrics.Int64Metric
|
||||
mNetDevRxFifo *metrics.Int64Metric
|
||||
mNetDevRxFrame *metrics.Int64Metric
|
||||
mNetDevRxCompressed *metrics.Int64Metric
|
||||
mNetDevRxMulticast *metrics.Int64Metric
|
||||
mNetDevTxBytes *metrics.Int64Metric
|
||||
mNetDevTxPackets *metrics.Int64Metric
|
||||
mNetDevTxErrors *metrics.Int64Metric
|
||||
mNetDevTxDropped *metrics.Int64Metric
|
||||
mNetDevTxFifo *metrics.Int64Metric
|
||||
mNetDevTxCollisions *metrics.Int64Metric
|
||||
mNetDevTxCarrier *metrics.Int64Metric
|
||||
mNetDevTxCompressed *metrics.Int64Metric
|
||||
|
||||
config *ssmtypes.NetStatsConfig
|
||||
}
|
||||
|
||||
func NewNetCollectorOrDie(netConfig *ssmtypes.NetStatsConfig) *netCollector {
|
||||
nc := netCollector{tags: map[string]string{}, config: netConfig}
|
||||
|
||||
kernelVersion, err := host.KernelVersion()
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to retrieve kernel version: %v", err)
|
||||
}
|
||||
nc.tags[kernelVersionLabel] = kernelVersion
|
||||
|
||||
osVersion, err := util.GetOSVersion()
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to retrieve OS version: %v", err)
|
||||
}
|
||||
nc.tags[osVersionLabel] = osVersion
|
||||
|
||||
nc.mNetDevRxBytes, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevRxBytes,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevRxBytes)].DisplayName,
|
||||
"Cumulative count of bytes received.",
|
||||
"Byte",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxBytes, err)
|
||||
}
|
||||
|
||||
nc.mNetDevRxPackets, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevRxPackets,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevRxPackets)].DisplayName,
|
||||
"Cumulative count of packets received.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxPackets, err)
|
||||
}
|
||||
|
||||
nc.mNetDevRxErrors, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevRxErrors,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevRxErrors)].DisplayName,
|
||||
"Cumulative count of receive errors encountered.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxErrors, err)
|
||||
}
|
||||
|
||||
nc.mNetDevRxDropped, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevRxDropped,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevRxDropped)].DisplayName,
|
||||
"Cumulative count of packets dropped while receiving.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxDropped, err)
|
||||
}
|
||||
|
||||
nc.mNetDevRxFifo, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevRxFifo,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevRxFifo)].DisplayName,
|
||||
"Cumulative count of FIFO buffer errors.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxFifo, err)
|
||||
}
|
||||
|
||||
nc.mNetDevRxFrame, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevRxFrame,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevRxFrame)].DisplayName,
|
||||
"Cumulative count of packet framing errors.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxFrame, err)
|
||||
}
|
||||
|
||||
nc.mNetDevRxCompressed, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevRxCompressed,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevRxCompressed)].DisplayName,
|
||||
"Cumulative count of compressed packets received by the device driver.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxCompressed, err)
|
||||
}
|
||||
|
||||
nc.mNetDevRxMulticast, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevRxMulticast,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevRxMulticast)].DisplayName,
|
||||
"Cumulative count of multicast frames received by the device driver.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxMulticast, err)
|
||||
}
|
||||
|
||||
nc.mNetDevTxBytes, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevTxBytes,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevTxBytes)].DisplayName,
|
||||
"Cumulative count of bytes transmitted.",
|
||||
"Byte",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxBytes, err)
|
||||
}
|
||||
|
||||
nc.mNetDevTxPackets, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevTxPackets,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevTxPackets)].DisplayName,
|
||||
"Cumulative count of packets transmitted.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxPackets, err)
|
||||
}
|
||||
|
||||
nc.mNetDevTxErrors, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevTxErrors,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevTxErrors)].DisplayName,
|
||||
"Cumulative count of transmit errors encountered.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxErrors, err)
|
||||
}
|
||||
|
||||
nc.mNetDevTxDropped, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevTxDropped,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevTxDropped)].DisplayName,
|
||||
"Cumulative count of packets dropped while transmitting.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxDropped, err)
|
||||
}
|
||||
|
||||
nc.mNetDevTxFifo, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevTxFifo,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevTxFifo)].DisplayName,
|
||||
"Cumulative count of FIFO buffer errors.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxFifo, err)
|
||||
}
|
||||
|
||||
nc.mNetDevTxCollisions, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevTxCollisions,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevTxCollisions)].DisplayName,
|
||||
"Cumulative count of collisions detected on the interface.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxCollisions, err)
|
||||
}
|
||||
|
||||
nc.mNetDevTxCarrier, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevTxCarrier,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevTxCarrier)].DisplayName,
|
||||
"Cumulative count of carrier losses detected by the device driver.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxCarrier, err)
|
||||
}
|
||||
|
||||
nc.mNetDevTxCompressed, err = metrics.NewInt64Metric(
|
||||
metrics.NetDevTxCompressed,
|
||||
netConfig.MetricsConfigs[string(metrics.NetDevTxCompressed)].DisplayName,
|
||||
"Cumulative count of compressed packets transmitted by the device driver.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{osVersionLabel, kernelVersionLabel, interfaceNameLabel})
|
||||
if err != nil {
|
||||
glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxCompressed, err)
|
||||
}
|
||||
|
||||
return &nc
|
||||
}
|
||||
|
||||
func (nc *netCollector) recordNetDev() {
|
||||
if nc.mNetDevRxBytes == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevRxPackets == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevRxErrors == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevRxDropped == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevRxFifo == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevRxFrame == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevRxCompressed == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevRxMulticast == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevTxBytes == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevTxPackets == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevTxErrors == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevTxDropped == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevTxFifo == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevTxCollisions == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevTxCarrier == nil {
|
||||
return
|
||||
}
|
||||
if nc.mNetDevTxCompressed == nil {
|
||||
return
|
||||
}
|
||||
|
||||
fs, err := procfs.NewFS("/proc")
|
||||
stats, err := fs.NetDev()
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to retrieve net dev stat: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
for iface, ifaceStats := range stats {
|
||||
nc.tags[interfaceNameLabel] = iface
|
||||
|
||||
nc.mNetDevRxBytes.Record(nc.tags, int64(ifaceStats.RxBytes))
|
||||
nc.mNetDevRxPackets.Record(nc.tags, int64(ifaceStats.RxPackets))
|
||||
nc.mNetDevRxErrors.Record(nc.tags, int64(ifaceStats.RxErrors))
|
||||
nc.mNetDevRxDropped.Record(nc.tags, int64(ifaceStats.RxDropped))
|
||||
nc.mNetDevRxFifo.Record(nc.tags, int64(ifaceStats.RxFIFO))
|
||||
nc.mNetDevRxFrame.Record(nc.tags, int64(ifaceStats.RxFrame))
|
||||
nc.mNetDevRxCompressed.Record(nc.tags, int64(ifaceStats.RxCompressed))
|
||||
nc.mNetDevRxMulticast.Record(nc.tags, int64(ifaceStats.RxMulticast))
|
||||
nc.mNetDevTxBytes.Record(nc.tags, int64(ifaceStats.TxBytes))
|
||||
nc.mNetDevTxPackets.Record(nc.tags, int64(ifaceStats.TxPackets))
|
||||
nc.mNetDevTxErrors.Record(nc.tags, int64(ifaceStats.TxErrors))
|
||||
nc.mNetDevTxDropped.Record(nc.tags, int64(ifaceStats.TxDropped))
|
||||
nc.mNetDevTxFifo.Record(nc.tags, int64(ifaceStats.TxFIFO))
|
||||
nc.mNetDevTxCollisions.Record(nc.tags, int64(ifaceStats.TxCollisions))
|
||||
nc.mNetDevTxCarrier.Record(nc.tags, int64(ifaceStats.TxCarrier))
|
||||
nc.mNetDevTxCompressed.Record(nc.tags, int64(ifaceStats.TxCompressed))
|
||||
}
|
||||
}
|
||||
|
||||
func (nc *netCollector) collect() {
|
||||
if nc == nil {
|
||||
return
|
||||
}
|
||||
|
||||
nc.recordNetDev()
|
||||
}
|
||||
@@ -44,6 +44,7 @@ type systemStatsMonitor struct {
|
||||
diskCollector *diskCollector
|
||||
hostCollector *hostCollector
|
||||
memoryCollector *memoryCollector
|
||||
netCollector *netCollector
|
||||
osFeatureCollector *osFeatureCollector
|
||||
tomb *tomb.Tomb
|
||||
}
|
||||
@@ -90,6 +91,9 @@ func NewSystemStatsMonitorOrDie(configPath string) types.Monitor {
|
||||
if len(ssm.config.OsFeatureConfig.MetricsConfigs) > 0 {
|
||||
ssm.osFeatureCollector = NewOsFeatureCollectorOrDie(&ssm.config.OsFeatureConfig)
|
||||
}
|
||||
if len(ssm.config.NetConfig.MetricsConfigs) > 0 {
|
||||
ssm.netCollector = NewNetCollectorOrDie(&ssm.config.NetConfig)
|
||||
}
|
||||
return &ssm
|
||||
}
|
||||
|
||||
@@ -115,6 +119,7 @@ func (ssm *systemStatsMonitor) monitorLoop() {
|
||||
ssm.hostCollector.collect()
|
||||
ssm.memoryCollector.collect()
|
||||
ssm.osFeatureCollector.collect()
|
||||
ssm.netCollector.collect()
|
||||
}
|
||||
|
||||
for {
|
||||
@@ -125,6 +130,7 @@ func (ssm *systemStatsMonitor) monitorLoop() {
|
||||
ssm.hostCollector.collect()
|
||||
ssm.memoryCollector.collect()
|
||||
ssm.osFeatureCollector.collect()
|
||||
ssm.netCollector.collect()
|
||||
case <-ssm.tomb.Stopping():
|
||||
glog.Infof("System stats monitor stopped: %s", ssm.configPath)
|
||||
return
|
||||
|
||||
@@ -56,12 +56,17 @@ type OSFeatureStatsConfig struct {
|
||||
KnownModulesConfigPath string `json:"knownModulesConfigPath"`
|
||||
}
|
||||
|
||||
type NetStatsConfig struct {
|
||||
MetricsConfigs map[string]MetricConfig `json:"metricsConfigs"`
|
||||
}
|
||||
|
||||
type SystemStatsConfig struct {
|
||||
CPUConfig CPUStatsConfig `json:"cpu"`
|
||||
DiskConfig DiskStatsConfig `json:"disk"`
|
||||
HostConfig HostStatsConfig `json:"host"`
|
||||
MemoryConfig MemoryStatsConfig `json:"memory"`
|
||||
OsFeatureConfig OSFeatureStatsConfig `json:"osFeature"`
|
||||
NetConfig NetStatsConfig `json:"net"`
|
||||
InvokeIntervalString string `json:"invokeInterval"`
|
||||
InvokeInterval time.Duration `json:"-"`
|
||||
}
|
||||
|
||||
@@ -46,6 +46,22 @@ const (
|
||||
SystemProcsRunning MetricID = "system/procs_running"
|
||||
SystemProcsBlocked MetricID = "system/procs_blocked"
|
||||
SystemInterruptsTotal MetricID = "system/interrupts_total"
|
||||
NetDevRxBytes MetricID = "net/rx_bytes"
|
||||
NetDevRxPackets MetricID = "net/rx_packets"
|
||||
NetDevRxErrors MetricID = "net/rx_errors"
|
||||
NetDevRxDropped MetricID = "net/rx_dropped"
|
||||
NetDevRxFifo MetricID = "net/rx_fifo"
|
||||
NetDevRxFrame MetricID = "net/rx_frame"
|
||||
NetDevRxCompressed MetricID = "net/rx_compressed"
|
||||
NetDevRxMulticast MetricID = "net/rx_multicast"
|
||||
NetDevTxBytes MetricID = "net/tx_bytes"
|
||||
NetDevTxPackets MetricID = "net/tx_packets"
|
||||
NetDevTxErrors MetricID = "net/tx_errors"
|
||||
NetDevTxDropped MetricID = "net/tx_dropped"
|
||||
NetDevTxFifo MetricID = "net/tx_fifo"
|
||||
NetDevTxCollisions MetricID = "net/tx_collisions"
|
||||
NetDevTxCarrier MetricID = "net/tx_carrier"
|
||||
NetDevTxCompressed MetricID = "net/tx_compressed"
|
||||
)
|
||||
|
||||
var MetricMap MetricMapping
|
||||
|
||||
@@ -47,13 +47,16 @@ function install-npd() {
|
||||
|
||||
readonly workdir=$(mktemp -d)
|
||||
tar -xf "${TARBALL}" --directory "${workdir}"
|
||||
|
||||
|
||||
echo "Preparing NPD binary directory."
|
||||
mkdir -p "${BIN_DIR}"
|
||||
mount --bind "${BIN_DIR}" "${BIN_DIR}"
|
||||
# Below remount is to work around COS's noexec mount on /home.
|
||||
mount -o remount,exec "${BIN_DIR}"
|
||||
|
||||
echo "Stopping NPD"
|
||||
systemctl stop node-problem-detector.service || true
|
||||
|
||||
echo "Installing NPD binary."
|
||||
cp "${workdir}"/bin/node-problem-detector "${BIN_DIR}"
|
||||
|
||||
@@ -75,7 +78,6 @@ function install-npd() {
|
||||
# Start systemd service.
|
||||
echo "Starting NPD systemd service."
|
||||
systemctl daemon-reload
|
||||
systemctl stop node-problem-detector.service || true
|
||||
systemctl start node-problem-detector.service
|
||||
}
|
||||
|
||||
@@ -97,4 +99,4 @@ done
|
||||
shift "$((OPTIND-1))"
|
||||
|
||||
|
||||
main "${@}"
|
||||
main "${@}"
|
||||
|
||||
Reference in New Issue
Block a user