mirror of
https://github.com/replicatedhq/troubleshoot.git
synced 2026-02-14 10:19:54 +00:00
call out to fio for host filesystem performance (#1275)
* stashing changes * split filesystem collector into fio and legacy functions * read fio results into analyzer * remove test script * update go.mod * remove old notes * go mod tidy * fix up go.mod * fix up go.mod * refactor tests for fio * make schemas * remove local scripts * local watch script for building troubleshoot * document watch script * fix var names * handle errors if run as non-root * go mod tidy * use String interface * collector happy path test * invalid filesize * invalid filesize * tests * remove old code * remove old init function * let actions tests run this * clean up tests * go mod tidy * remove duplicated type declaration * remove old file create code
This commit is contained in:
5
go.mod
5
go.mod
@@ -86,6 +86,7 @@ require (
|
||||
github.com/mistifyio/go-zfs/v3 v3.0.0 // indirect
|
||||
github.com/mitchellh/copystructure v1.2.0 // indirect
|
||||
github.com/mitchellh/reflectwalk v1.0.2 // indirect
|
||||
github.com/onsi/ginkgo v1.14.0 // indirect
|
||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
|
||||
github.com/rubenv/sql-migrate v1.3.1 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
@@ -118,7 +119,7 @@ require (
|
||||
github.com/Microsoft/go-winio v0.6.0 // indirect
|
||||
github.com/Microsoft/hcsshim v0.10.0-rc.7 // indirect
|
||||
github.com/andybalholm/brotli v1.0.1 // indirect
|
||||
github.com/aws/aws-sdk-go v1.44.122 // indirect
|
||||
github.com/aws/aws-sdk-go v1.44.198 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d // indirect
|
||||
github.com/c9s/goprocinfo v0.0.0-20170724085704-0010a05ce49f // indirect
|
||||
@@ -221,7 +222,7 @@ require (
|
||||
go.opencensus.io v0.24.0 // indirect
|
||||
go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect
|
||||
golang.org/x/crypto v0.12.0 // indirect
|
||||
golang.org/x/net v0.14.0 // indirect
|
||||
golang.org/x/net v0.14.0
|
||||
golang.org/x/oauth2 v0.8.0 // indirect
|
||||
golang.org/x/sys v0.12.0 // indirect
|
||||
golang.org/x/term v0.11.0 // indirect
|
||||
|
||||
17
go.sum
17
go.sum
@@ -244,8 +244,9 @@ github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgI
|
||||
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
|
||||
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so=
|
||||
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
|
||||
github.com/aws/aws-sdk-go v1.44.122 h1:p6mw01WBaNpbdP2xrisz5tIkcNwzj/HysobNoaAHjgo=
|
||||
github.com/aws/aws-sdk-go v1.44.122/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo=
|
||||
github.com/aws/aws-sdk-go v1.44.198 h1:kgnvxQv4/kP5M0nbxBx0Ac0so9ndr9f8Ti0g+NmPQF8=
|
||||
github.com/aws/aws-sdk-go v1.44.198/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
|
||||
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
|
||||
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
@@ -609,7 +610,6 @@ github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO
|
||||
github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ=
|
||||
github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I=
|
||||
github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc=
|
||||
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
|
||||
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
|
||||
github.com/huandu/xstrings v1.3.1/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||
github.com/huandu/xstrings v1.3.2/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||
@@ -800,13 +800,19 @@ github.com/nsf/termbox-go v0.0.0-20190121233118-02980233997d/go.mod h1:IuKpRQcYE
|
||||
github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
|
||||
github.com/nwaples/rardecode v1.1.2 h1:Cj0yZY6T1Zx1R7AhTbyGSALm44/Mmq+BAPc4B/p/d3M=
|
||||
github.com/nwaples/rardecode v1.1.2/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
|
||||
github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78=
|
||||
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
|
||||
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
|
||||
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
|
||||
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/ginkgo v1.10.1 h1:q/mM8GF/n0shIN8SaAZ0V+jnLPzen6WIVZdiwrRlMlo=
|
||||
github.com/onsi/ginkgo v1.10.1/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
|
||||
github.com/onsi/ginkgo v1.14.0 h1:2mOpI4JVVPBN+WQRa0WKH2eXR+Ey+uK4n7Zj0aYpIQA=
|
||||
github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
|
||||
github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU=
|
||||
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
|
||||
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
|
||||
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
|
||||
github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI=
|
||||
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
||||
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
|
||||
@@ -1138,6 +1144,7 @@ golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/
|
||||
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
||||
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
||||
@@ -1234,12 +1241,14 @@ golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7w
|
||||
golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
@@ -1254,6 +1263,7 @@ golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7w
|
||||
golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
@@ -1648,7 +1658,6 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntN
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/cheggaaa/pb.v1 v1.0.27/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw=
|
||||
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
|
||||
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
|
||||
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
|
||||
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
|
||||
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
|
||||
|
||||
@@ -42,7 +42,18 @@ func (a *AnalyzeHostFilesystemPerformance) Analyze(
|
||||
return nil, errors.Wrapf(err, "failed to get collected file %s", name)
|
||||
}
|
||||
|
||||
fsPerf := collect.FSPerfResults{}
|
||||
fioResult := collect.FioResult{}
|
||||
if err := json.Unmarshal(contents, &fioResult); err != nil {
|
||||
return nil, errors.Wrapf(err, "failed to unmarshal fio results from %s", name)
|
||||
}
|
||||
|
||||
if len(fioResult.Jobs) == 0 {
|
||||
return nil, errors.Errorf("no jobs found in fio results from %s", name)
|
||||
}
|
||||
|
||||
fioWriteLatency := fioResult.Jobs[0].Sync
|
||||
|
||||
fsPerf := fioWriteLatency.FSPerfResults()
|
||||
if err := json.Unmarshal(contents, &fsPerf); err != nil {
|
||||
return nil, errors.Wrapf(err, "failed to unmarshal filesystem performance results from %s", name)
|
||||
}
|
||||
@@ -179,7 +190,7 @@ func compareHostFilesystemPerformanceConditionalToActual(conditional string, fsP
|
||||
return doCompareHostFilesystemPerformance(comparator, fsPerf.P9999, desiredDuration)
|
||||
}
|
||||
|
||||
return false, fmt.Errorf("Unknown filesystem performance keyword %q", keyword)
|
||||
return false, fmt.Errorf("unknown filesystem performance keyword %q", keyword)
|
||||
}
|
||||
|
||||
func doCompareHostFilesystemPerformance(operator string, actual time.Duration, desired time.Duration) (bool, error) {
|
||||
@@ -196,7 +207,7 @@ func doCompareHostFilesystemPerformance(operator string, actual time.Duration, d
|
||||
return actual == desired, nil
|
||||
}
|
||||
|
||||
return false, fmt.Errorf("Unknown filesystem performance operator %q", operator)
|
||||
return false, fmt.Errorf("unknown filesystem performance operator %q", operator)
|
||||
}
|
||||
|
||||
func renderFSPerfOutcome(outcome string, fsPerf collect.FSPerfResults) string {
|
||||
|
||||
@@ -1,12 +1,9 @@
|
||||
package analyzer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
|
||||
"github.com/replicatedhq/troubleshoot/pkg/collect"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
@@ -14,35 +11,305 @@ import (
|
||||
func TestAnalyzeHostFilesystemPerformance(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
fsPerf *collect.FSPerfResults
|
||||
fioResult string
|
||||
hostAnalyzer *troubleshootv1beta2.FilesystemPerformanceAnalyze
|
||||
result []*AnalyzeResult
|
||||
expectErr bool
|
||||
}{
|
||||
{
|
||||
name: "Cover",
|
||||
fsPerf: &collect.FSPerfResults{
|
||||
Min: 200 * time.Nanosecond,
|
||||
Max: time.Second,
|
||||
Average: 55 * time.Microsecond,
|
||||
P1: 1 * time.Microsecond,
|
||||
P5: 5 * time.Microsecond,
|
||||
P10: 10 * time.Microsecond,
|
||||
P20: 20 * time.Microsecond,
|
||||
P30: 30 * time.Microsecond,
|
||||
P40: 40 * time.Microsecond,
|
||||
P50: 50 * time.Microsecond,
|
||||
P60: 60 * time.Microsecond,
|
||||
P70: 70 * time.Microsecond,
|
||||
P80: 80 * time.Microsecond,
|
||||
P90: 90 * time.Microsecond,
|
||||
P95: 95 * time.Microsecond,
|
||||
P99: 99 * time.Microsecond,
|
||||
P995: 995 * time.Microsecond,
|
||||
P999: 999 * time.Microsecond,
|
||||
P9995: 5 * time.Millisecond,
|
||||
P9999: 9 * time.Millisecond,
|
||||
},
|
||||
fioResult: `{
|
||||
"fio version" : "fio-3.28",
|
||||
"timestamp" : 1691679955,
|
||||
"timestamp_ms" : 1691679955590,
|
||||
"time" : "Thu Aug 10 15:05:55 2023",
|
||||
"global options" : {
|
||||
"rw" : "write",
|
||||
"ioengine" : "sync",
|
||||
"fdatasync" : "1",
|
||||
"directory" : "/var/lib/etcd",
|
||||
"size" : "23068672",
|
||||
"bs" : "1024"
|
||||
},
|
||||
"jobs" : [
|
||||
{
|
||||
"jobname" : "fsperf",
|
||||
"groupid" : 0,
|
||||
"error" : 0,
|
||||
"eta" : 0,
|
||||
"elapsed" : 15,
|
||||
"job options" : {
|
||||
"name" : "fsperf",
|
||||
"runtime" : "120"
|
||||
},
|
||||
"read" : {
|
||||
"io_bytes" : 0,
|
||||
"io_kbytes" : 0,
|
||||
"bw_bytes" : 0,
|
||||
"bw" : 0,
|
||||
"iops" : 0.000000,
|
||||
"runtime" : 0,
|
||||
"total_ios" : 0,
|
||||
"short_ios" : 22527,
|
||||
"drop_ios" : 0,
|
||||
"slat_ns" : {
|
||||
"min" : 0,
|
||||
"max" : 0,
|
||||
"mean" : 0.000000,
|
||||
"stddev" : 0.000000,
|
||||
"N" : 0
|
||||
},
|
||||
"clat_ns" : {
|
||||
"min" : 0,
|
||||
"max" : 0,
|
||||
"mean" : 0.000000,
|
||||
"stddev" : 0.000000,
|
||||
"N" : 0
|
||||
},
|
||||
"lat_ns" : {
|
||||
"min" : 0,
|
||||
"max" : 0,
|
||||
"mean" : 0.000000,
|
||||
"stddev" : 0.000000,
|
||||
"N" : 0
|
||||
},
|
||||
"bw_min" : 0,
|
||||
"bw_max" : 0,
|
||||
"bw_agg" : 0.000000,
|
||||
"bw_mean" : 0.000000,
|
||||
"bw_dev" : 0.000000,
|
||||
"bw_samples" : 0,
|
||||
"iops_min" : 0,
|
||||
"iops_max" : 0,
|
||||
"iops_mean" : 0.000000,
|
||||
"iops_stddev" : 0.000000,
|
||||
"iops_samples" : 0
|
||||
},
|
||||
"write" : {
|
||||
"io_bytes" : 23068672,
|
||||
"io_kbytes" : 22528,
|
||||
"bw_bytes" : 1651182,
|
||||
"bw" : 1612,
|
||||
"iops" : 1612.483001,
|
||||
"runtime" : 13971,
|
||||
"total_ios" : 22528,
|
||||
"short_ios" : 0,
|
||||
"drop_ios" : 0,
|
||||
"slat_ns" : {
|
||||
"min" : 0,
|
||||
"max" : 0,
|
||||
"mean" : 0.000000,
|
||||
"stddev" : 0.000000,
|
||||
"N" : 0
|
||||
},
|
||||
"clat_ns" : {
|
||||
"min" : 200,
|
||||
"max" : 1000000000,
|
||||
"mean" : 55000,
|
||||
"stddev" : 12345.6789,
|
||||
"N" : 32400,
|
||||
"percentile" : {
|
||||
"1.000000" : 1000,
|
||||
"5.000000" : 5000,
|
||||
"10.000000" : 10000,
|
||||
"20.000000" : 20000,
|
||||
"30.000000" : 30000,
|
||||
"40.000000" : 40000,
|
||||
"50.000000" : 50000,
|
||||
"60.000000" : 60000,
|
||||
"70.000000" : 70000,
|
||||
"80.000000" : 80000,
|
||||
"90.000000" : 90000,
|
||||
"95.000000" : 95000,
|
||||
"99.000000" : 99000,
|
||||
"99.500000" : 995000,
|
||||
"99.900000" : 999000,
|
||||
"99.950000" : 5000000,
|
||||
"99.990000" : 9000000
|
||||
}
|
||||
},
|
||||
"lat_ns" : {
|
||||
"min" : 2684,
|
||||
"max" : 8710446,
|
||||
"mean" : 95169.335405,
|
||||
"stddev" : 172145.383902,
|
||||
"N" : 22528
|
||||
},
|
||||
"bw_min" : 1516,
|
||||
"bw_max" : 1706,
|
||||
"bw_agg" : 100.000000,
|
||||
"bw_mean" : 1613.629630,
|
||||
"bw_dev" : 35.708379,
|
||||
"bw_samples" : 27,
|
||||
"iops_min" : 1516,
|
||||
"iops_max" : 1706,
|
||||
"iops_mean" : 1613.629630,
|
||||
"iops_stddev" : 35.708379,
|
||||
"iops_samples" : 27
|
||||
},
|
||||
"trim" : {
|
||||
"io_bytes" : 0,
|
||||
"io_kbytes" : 0,
|
||||
"bw_bytes" : 0,
|
||||
"bw" : 0,
|
||||
"iops" : 0.000000,
|
||||
"runtime" : 0,
|
||||
"total_ios" : 0,
|
||||
"short_ios" : 0,
|
||||
"drop_ios" : 0,
|
||||
"slat_ns" : {
|
||||
"min" : 0,
|
||||
"max" : 0,
|
||||
"mean" : 0.000000,
|
||||
"stddev" : 0.000000,
|
||||
"N" : 0
|
||||
},
|
||||
"clat_ns" : {
|
||||
"min" : 0,
|
||||
"max" : 0,
|
||||
"mean" : 0.000000,
|
||||
"stddev" : 0.000000,
|
||||
"N" : 0
|
||||
},
|
||||
"lat_ns" : {
|
||||
"min" : 0,
|
||||
"max" : 0,
|
||||
"mean" : 0.000000,
|
||||
"stddev" : 0.000000,
|
||||
"N" : 0
|
||||
},
|
||||
"bw_min" : 0,
|
||||
"bw_max" : 0,
|
||||
"bw_agg" : 0.000000,
|
||||
"bw_mean" : 0.000000,
|
||||
"bw_dev" : 0.000000,
|
||||
"bw_samples" : 0,
|
||||
"iops_min" : 0,
|
||||
"iops_max" : 0,
|
||||
"iops_mean" : 0.000000,
|
||||
"iops_stddev" : 0.000000,
|
||||
"iops_samples" : 0
|
||||
},
|
||||
"sync" : {
|
||||
"total_ios" : 0,
|
||||
"lat_ns" : {
|
||||
"min" : 200,
|
||||
"max" : 1000000000,
|
||||
"mean" : 55000,
|
||||
"stddev" : 12345.6789,
|
||||
"N" : 32400,
|
||||
"percentile" : {
|
||||
"1.000000" : 1000,
|
||||
"5.000000" : 5000,
|
||||
"10.000000" : 10000,
|
||||
"20.000000" : 20000,
|
||||
"30.000000" : 30000,
|
||||
"40.000000" : 40000,
|
||||
"50.000000" : 50000,
|
||||
"60.000000" : 60000,
|
||||
"70.000000" : 70000,
|
||||
"80.000000" : 80000,
|
||||
"90.000000" : 90000,
|
||||
"95.000000" : 95000,
|
||||
"99.000000" : 99000,
|
||||
"99.500000" : 995000,
|
||||
"99.900000" : 999000,
|
||||
"99.950000" : 5000000,
|
||||
"99.990000" : 9000000
|
||||
}
|
||||
}
|
||||
},
|
||||
"job_runtime" : 13970,
|
||||
"usr_cpu" : 1.410165,
|
||||
"sys_cpu" : 5.454545,
|
||||
"ctx" : 72137,
|
||||
"majf" : 0,
|
||||
"minf" : 16,
|
||||
"iodepth_level" : {
|
||||
"1" : 199.995561,
|
||||
"2" : 0.000000,
|
||||
"4" : 0.000000,
|
||||
"8" : 0.000000,
|
||||
"16" : 0.000000,
|
||||
"32" : 0.000000,
|
||||
">=64" : 0.000000
|
||||
},
|
||||
"iodepth_submit" : {
|
||||
"0" : 0.000000,
|
||||
"4" : 100.000000,
|
||||
"8" : 0.000000,
|
||||
"16" : 0.000000,
|
||||
"32" : 0.000000,
|
||||
"64" : 0.000000,
|
||||
">=64" : 0.000000
|
||||
},
|
||||
"iodepth_complete" : {
|
||||
"0" : 0.000000,
|
||||
"4" : 100.000000,
|
||||
"8" : 0.000000,
|
||||
"16" : 0.000000,
|
||||
"32" : 0.000000,
|
||||
"64" : 0.000000,
|
||||
">=64" : 0.000000
|
||||
},
|
||||
"latency_ns" : {
|
||||
"2" : 0.000000,
|
||||
"4" : 0.000000,
|
||||
"10" : 0.000000,
|
||||
"20" : 0.000000,
|
||||
"50" : 0.000000,
|
||||
"100" : 0.000000,
|
||||
"250" : 0.000000,
|
||||
"500" : 0.000000,
|
||||
"750" : 0.000000,
|
||||
"1000" : 0.000000
|
||||
},
|
||||
"latency_us" : {
|
||||
"2" : 0.000000,
|
||||
"4" : 27.077415,
|
||||
"10" : 42.032138,
|
||||
"20" : 5.450994,
|
||||
"50" : 0.306286,
|
||||
"100" : 0.026634,
|
||||
"250" : 0.461648,
|
||||
"500" : 23.291016,
|
||||
"750" : 1.269531,
|
||||
"1000" : 0.035511
|
||||
},
|
||||
"latency_ms" : {
|
||||
"2" : 0.026634,
|
||||
"4" : 0.017756,
|
||||
"10" : 0.010000,
|
||||
"20" : 0.000000,
|
||||
"50" : 0.000000,
|
||||
"100" : 0.000000,
|
||||
"250" : 0.000000,
|
||||
"500" : 0.000000,
|
||||
"750" : 0.000000,
|
||||
"1000" : 0.000000,
|
||||
"2000" : 0.000000,
|
||||
">=2000" : 0.000000
|
||||
},
|
||||
"latency_depth" : 1,
|
||||
"latency_target" : 0,
|
||||
"latency_percentile" : 100.000000,
|
||||
"latency_window" : 0
|
||||
}
|
||||
],
|
||||
"disk_util" : [
|
||||
{
|
||||
"name" : "sda",
|
||||
"read_ios" : 5610,
|
||||
"write_ios" : 45550,
|
||||
"read_merges" : 0,
|
||||
"write_merges" : 568,
|
||||
"read_ticks" : 1863,
|
||||
"write_ticks" : 11605,
|
||||
"in_queue" : 14353,
|
||||
"util" : 99.435028
|
||||
}
|
||||
]
|
||||
}`,
|
||||
hostAnalyzer: &troubleshootv1beta2.FilesystemPerformanceAnalyze{
|
||||
CollectorName: "etcd",
|
||||
Outcomes: []*troubleshootv1beta2.Outcome{
|
||||
@@ -298,9 +565,298 @@ func TestAnalyzeHostFilesystemPerformance(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "skip warn if pass first",
|
||||
fsPerf: &collect.FSPerfResults{
|
||||
P99: 9 * time.Millisecond,
|
||||
},
|
||||
fioResult: `{
|
||||
"fio version" : "fio-3.28",
|
||||
"timestamp" : 1691679955,
|
||||
"timestamp_ms" : 1691679955590,
|
||||
"time" : "Thu Aug 10 15:05:55 2023",
|
||||
"global options" : {
|
||||
"rw" : "write",
|
||||
"ioengine" : "sync",
|
||||
"fdatasync" : "1",
|
||||
"directory" : "/var/lib/etcd",
|
||||
"size" : "23068672",
|
||||
"bs" : "1024"
|
||||
},
|
||||
"jobs" : [
|
||||
{
|
||||
"jobname" : "fsperf",
|
||||
"groupid" : 0,
|
||||
"error" : 0,
|
||||
"eta" : 0,
|
||||
"elapsed" : 15,
|
||||
"job options" : {
|
||||
"name" : "fsperf",
|
||||
"runtime" : "120"
|
||||
},
|
||||
"read" : {
|
||||
"io_bytes" : 0,
|
||||
"io_kbytes" : 0,
|
||||
"bw_bytes" : 0,
|
||||
"bw" : 0,
|
||||
"iops" : 0.000000,
|
||||
"runtime" : 0,
|
||||
"total_ios" : 0,
|
||||
"short_ios" : 22527,
|
||||
"drop_ios" : 0,
|
||||
"slat_ns" : {
|
||||
"min" : 0,
|
||||
"max" : 0,
|
||||
"mean" : 0.000000,
|
||||
"stddev" : 0.000000,
|
||||
"N" : 0
|
||||
},
|
||||
"clat_ns" : {
|
||||
"min" : 0,
|
||||
"max" : 0,
|
||||
"mean" : 0.000000,
|
||||
"stddev" : 0.000000,
|
||||
"N" : 0
|
||||
},
|
||||
"lat_ns" : {
|
||||
"min" : 0,
|
||||
"max" : 0,
|
||||
"mean" : 0.000000,
|
||||
"stddev" : 0.000000,
|
||||
"N" : 0
|
||||
},
|
||||
"bw_min" : 0,
|
||||
"bw_max" : 0,
|
||||
"bw_agg" : 0.000000,
|
||||
"bw_mean" : 0.000000,
|
||||
"bw_dev" : 0.000000,
|
||||
"bw_samples" : 0,
|
||||
"iops_min" : 0,
|
||||
"iops_max" : 0,
|
||||
"iops_mean" : 0.000000,
|
||||
"iops_stddev" : 0.000000,
|
||||
"iops_samples" : 0
|
||||
},
|
||||
"write" : {
|
||||
"io_bytes" : 23068672,
|
||||
"io_kbytes" : 22528,
|
||||
"bw_bytes" : 1651182,
|
||||
"bw" : 1612,
|
||||
"iops" : 1612.483001,
|
||||
"runtime" : 13971,
|
||||
"total_ios" : 22528,
|
||||
"short_ios" : 0,
|
||||
"drop_ios" : 0,
|
||||
"slat_ns" : {
|
||||
"min" : 0,
|
||||
"max" : 0,
|
||||
"mean" : 0.000000,
|
||||
"stddev" : 0.000000,
|
||||
"N" : 0
|
||||
},
|
||||
"clat_ns" : {
|
||||
"min" : 200,
|
||||
"max" : 1000000000,
|
||||
"mean" : 55000,
|
||||
"stddev" : 12345.6789,
|
||||
"N" : 32400,
|
||||
"percentile" : {
|
||||
"1.000000" : 1000,
|
||||
"5.000000" : 5000,
|
||||
"10.000000" : 10000,
|
||||
"20.000000" : 20000,
|
||||
"30.000000" : 30000,
|
||||
"40.000000" : 40000,
|
||||
"50.000000" : 50000,
|
||||
"60.000000" : 60000,
|
||||
"70.000000" : 70000,
|
||||
"80.000000" : 80000,
|
||||
"90.000000" : 90000,
|
||||
"95.000000" : 95000,
|
||||
"99.000000" : 99000,
|
||||
"99.500000" : 995000,
|
||||
"99.900000" : 999000,
|
||||
"99.950000" : 5000000,
|
||||
"99.990000" : 9000000
|
||||
}
|
||||
},
|
||||
"lat_ns" : {
|
||||
"min" : 2684,
|
||||
"max" : 8710446,
|
||||
"mean" : 95169.335405,
|
||||
"stddev" : 172145.383902,
|
||||
"N" : 22528
|
||||
},
|
||||
"bw_min" : 1516,
|
||||
"bw_max" : 1706,
|
||||
"bw_agg" : 100.000000,
|
||||
"bw_mean" : 1613.629630,
|
||||
"bw_dev" : 35.708379,
|
||||
"bw_samples" : 27,
|
||||
"iops_min" : 1516,
|
||||
"iops_max" : 1706,
|
||||
"iops_mean" : 1613.629630,
|
||||
"iops_stddev" : 35.708379,
|
||||
"iops_samples" : 27
|
||||
},
|
||||
"trim" : {
|
||||
"io_bytes" : 0,
|
||||
"io_kbytes" : 0,
|
||||
"bw_bytes" : 0,
|
||||
"bw" : 0,
|
||||
"iops" : 0.000000,
|
||||
"runtime" : 0,
|
||||
"total_ios" : 0,
|
||||
"short_ios" : 0,
|
||||
"drop_ios" : 0,
|
||||
"slat_ns" : {
|
||||
"min" : 0,
|
||||
"max" : 0,
|
||||
"mean" : 0.000000,
|
||||
"stddev" : 0.000000,
|
||||
"N" : 0
|
||||
},
|
||||
"clat_ns" : {
|
||||
"min" : 0,
|
||||
"max" : 0,
|
||||
"mean" : 0.000000,
|
||||
"stddev" : 0.000000,
|
||||
"N" : 0
|
||||
},
|
||||
"lat_ns" : {
|
||||
"min" : 0,
|
||||
"max" : 0,
|
||||
"mean" : 0.000000,
|
||||
"stddev" : 0.000000,
|
||||
"N" : 0
|
||||
},
|
||||
"bw_min" : 0,
|
||||
"bw_max" : 0,
|
||||
"bw_agg" : 0.000000,
|
||||
"bw_mean" : 0.000000,
|
||||
"bw_dev" : 0.000000,
|
||||
"bw_samples" : 0,
|
||||
"iops_min" : 0,
|
||||
"iops_max" : 0,
|
||||
"iops_mean" : 0.000000,
|
||||
"iops_stddev" : 0.000000,
|
||||
"iops_samples" : 0
|
||||
},
|
||||
"sync" : {
|
||||
"total_ios" : 0,
|
||||
"lat_ns" : {
|
||||
"min" : 200,
|
||||
"max" : 1000000000,
|
||||
"mean" : 55000,
|
||||
"stddev" : 12345.6789,
|
||||
"N" : 32400,
|
||||
"percentile" : {
|
||||
"1.000000" : 1000,
|
||||
"5.000000" : 5000,
|
||||
"10.000000" : 10000,
|
||||
"20.000000" : 20000,
|
||||
"30.000000" : 30000,
|
||||
"40.000000" : 40000,
|
||||
"50.000000" : 50000,
|
||||
"60.000000" : 60000,
|
||||
"70.000000" : 70000,
|
||||
"80.000000" : 80000,
|
||||
"90.000000" : 90000,
|
||||
"95.000000" : 95000,
|
||||
"99.000000" : 9000000,
|
||||
"99.500000" : 995000,
|
||||
"99.900000" : 999000,
|
||||
"99.950000" : 5000000,
|
||||
"99.990000" : 9000000
|
||||
}
|
||||
}
|
||||
},
|
||||
"job_runtime" : 13970,
|
||||
"usr_cpu" : 1.410165,
|
||||
"sys_cpu" : 5.454545,
|
||||
"ctx" : 72137,
|
||||
"majf" : 0,
|
||||
"minf" : 16,
|
||||
"iodepth_level" : {
|
||||
"1" : 199.995561,
|
||||
"2" : 0.000000,
|
||||
"4" : 0.000000,
|
||||
"8" : 0.000000,
|
||||
"16" : 0.000000,
|
||||
"32" : 0.000000,
|
||||
">=64" : 0.000000
|
||||
},
|
||||
"iodepth_submit" : {
|
||||
"0" : 0.000000,
|
||||
"4" : 100.000000,
|
||||
"8" : 0.000000,
|
||||
"16" : 0.000000,
|
||||
"32" : 0.000000,
|
||||
"64" : 0.000000,
|
||||
">=64" : 0.000000
|
||||
},
|
||||
"iodepth_complete" : {
|
||||
"0" : 0.000000,
|
||||
"4" : 100.000000,
|
||||
"8" : 0.000000,
|
||||
"16" : 0.000000,
|
||||
"32" : 0.000000,
|
||||
"64" : 0.000000,
|
||||
">=64" : 0.000000
|
||||
},
|
||||
"latency_ns" : {
|
||||
"2" : 0.000000,
|
||||
"4" : 0.000000,
|
||||
"10" : 0.000000,
|
||||
"20" : 0.000000,
|
||||
"50" : 0.000000,
|
||||
"100" : 0.000000,
|
||||
"250" : 0.000000,
|
||||
"500" : 0.000000,
|
||||
"750" : 0.000000,
|
||||
"1000" : 0.000000
|
||||
},
|
||||
"latency_us" : {
|
||||
"2" : 0.000000,
|
||||
"4" : 27.077415,
|
||||
"10" : 42.032138,
|
||||
"20" : 5.450994,
|
||||
"50" : 0.306286,
|
||||
"100" : 0.026634,
|
||||
"250" : 0.461648,
|
||||
"500" : 23.291016,
|
||||
"750" : 1.269531,
|
||||
"1000" : 0.035511
|
||||
},
|
||||
"latency_ms" : {
|
||||
"2" : 0.026634,
|
||||
"4" : 0.017756,
|
||||
"10" : 0.010000,
|
||||
"20" : 0.000000,
|
||||
"50" : 0.000000,
|
||||
"100" : 0.000000,
|
||||
"250" : 0.000000,
|
||||
"500" : 0.000000,
|
||||
"750" : 0.000000,
|
||||
"1000" : 0.000000,
|
||||
"2000" : 0.000000,
|
||||
">=2000" : 0.000000
|
||||
},
|
||||
"latency_depth" : 1,
|
||||
"latency_target" : 0,
|
||||
"latency_percentile" : 100.000000,
|
||||
"latency_window" : 0
|
||||
}
|
||||
],
|
||||
"disk_util" : [
|
||||
{
|
||||
"name" : "sda",
|
||||
"read_ios" : 5610,
|
||||
"write_ios" : 45550,
|
||||
"read_merges" : 0,
|
||||
"write_merges" : 568,
|
||||
"read_ticks" : 1863,
|
||||
"write_ticks" : 11605,
|
||||
"in_queue" : 14353,
|
||||
"util" : 99.435028
|
||||
}
|
||||
]
|
||||
}`,
|
||||
hostAnalyzer: &troubleshootv1beta2.FilesystemPerformanceAnalyze{
|
||||
CollectorName: "file system performance",
|
||||
Outcomes: []*troubleshootv1beta2.Outcome{
|
||||
@@ -332,20 +888,66 @@ func TestAnalyzeHostFilesystemPerformance(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "bail if malformed JSON",
|
||||
fioResult: `{
|
||||
bad JSON
|
||||
}`,
|
||||
hostAnalyzer: &troubleshootv1beta2.FilesystemPerformanceAnalyze{
|
||||
CollectorName: "file system performance",
|
||||
Outcomes: []*troubleshootv1beta2.Outcome{
|
||||
{
|
||||
Fail: &troubleshootv1beta2.SingleOutcome{
|
||||
Message: "bad JSON should not be analyzed",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
{
|
||||
name: "bail if fio ran no jobs",
|
||||
fioResult: `{
|
||||
"fio version" : "fio-3.28",
|
||||
"timestamp" : 1691679955,
|
||||
"timestamp_ms" : 1691679955590,
|
||||
"time" : "Thu Aug 10 15:05:55 2023",
|
||||
"global options" : {
|
||||
"rw" : "write",
|
||||
"ioengine" : "sync",
|
||||
"fdatasync" : "1",
|
||||
"directory" : "/var/lib/etcd",
|
||||
"size" : "23068672",
|
||||
"bs" : "1024"
|
||||
},
|
||||
"jobs" : [
|
||||
]
|
||||
}`,
|
||||
hostAnalyzer: &troubleshootv1beta2.FilesystemPerformanceAnalyze{
|
||||
CollectorName: "file system performance",
|
||||
Outcomes: []*troubleshootv1beta2.Outcome{
|
||||
{
|
||||
Fail: &troubleshootv1beta2.SingleOutcome{
|
||||
Message: "an empty Jobs array should not be analyzed",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectErr: true,
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
req := require.New(t)
|
||||
b, err := json.Marshal(test.fsPerf)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
b := []byte(test.fioResult)
|
||||
|
||||
getCollectedFileContents := func(filename string) ([]byte, error) {
|
||||
return b, nil
|
||||
}
|
||||
|
||||
result, err := (&AnalyzeHostFilesystemPerformance{test.hostAnalyzer}).Analyze(getCollectedFileContents, nil)
|
||||
a := AnalyzeHostFilesystemPerformance{test.hostAnalyzer}
|
||||
result, err := a.Analyze(getCollectedFileContents, nil)
|
||||
if test.expectErr {
|
||||
req.Error(err)
|
||||
} else {
|
||||
|
||||
@@ -2,16 +2,35 @@ package collect
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"os/exec"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
|
||||
"golang.org/x/net/context"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
func init() {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
type Durations []time.Duration
|
||||
|
||||
func (d Durations) Len() int {
|
||||
return len(d)
|
||||
}
|
||||
|
||||
func (d Durations) Less(i, j int) bool {
|
||||
return d[i] < d[j]
|
||||
}
|
||||
|
||||
func (d Durations) Swap(i, j int) {
|
||||
d[i], d[j] = d[j], d[i]
|
||||
}
|
||||
|
||||
type CollectHostFilesystemPerformance struct {
|
||||
@@ -90,3 +109,324 @@ func (f FSPerfResults) String() string {
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
type FioResult struct {
|
||||
FioVersion string `json:"fio version,omitempty"`
|
||||
Timestamp int64 `json:"timestamp,omitempty"`
|
||||
TimestampMS int64 `json:"timestamp_ms,omitempty"`
|
||||
Time string `json:"time,omitempty"`
|
||||
GlobalOptions FioGlobalOptions `json:"global options,omitempty"`
|
||||
Jobs []FioJobs `json:"jobs,omitempty"`
|
||||
DiskUtil []FioDiskUtil `json:"disk_util,omitempty"`
|
||||
}
|
||||
|
||||
func (f FioResult) String() string {
|
||||
var res string
|
||||
res += fmt.Sprintf("FIO version - %s\n", f.FioVersion)
|
||||
res += fmt.Sprintf("Global options - %s\n\n", f.GlobalOptions)
|
||||
for _, job := range f.Jobs {
|
||||
res += fmt.Sprintf("%s\n", job)
|
||||
}
|
||||
res += "Disk stats (read/write):\n"
|
||||
for _, du := range f.DiskUtil {
|
||||
res += fmt.Sprintf("%s\n", du)
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
type FioGlobalOptions struct {
|
||||
Directory string `json:"directory,omitempty"`
|
||||
RandRepeat string `json:"randrepeat,omitempty"`
|
||||
Verify string `json:"verify,omitempty"`
|
||||
IOEngine string `json:"ioengine,omitempty"`
|
||||
Direct string `json:"direct,omitempty"`
|
||||
GtodReduce string `json:"gtod_reduce,omitempty"`
|
||||
}
|
||||
|
||||
func (g FioGlobalOptions) String() string {
|
||||
return fmt.Sprintf("ioengine=%s verify=%s direct=%s gtod_reduce=%s", g.IOEngine, g.Verify, g.Direct, g.GtodReduce)
|
||||
}
|
||||
|
||||
type FioJobs struct {
|
||||
JobName string `json:"jobname,omitempty"`
|
||||
GroupID int `json:"groupid,omitempty"`
|
||||
Error int `json:"error,omitempty"`
|
||||
Eta int `json:"eta,omitempty"`
|
||||
Elapsed int `json:"elapsed,omitempty"`
|
||||
JobOptions FioJobOptions `json:"job options,omitempty"`
|
||||
Read FioStats `json:"read,omitempty"`
|
||||
Write FioStats `json:"write,omitempty"`
|
||||
Trim FioStats `json:"trim,omitempty"`
|
||||
Sync FioStats `json:"sync,omitempty"`
|
||||
JobRuntime int32 `json:"job_runtime,omitempty"`
|
||||
UsrCpu float32 `json:"usr_cpu,omitempty"`
|
||||
SysCpu float32 `json:"sys_cpu,omitempty"`
|
||||
Ctx int32 `json:"ctx,omitempty"`
|
||||
MajF int32 `json:"majf,omitempty"`
|
||||
MinF int32 `json:"minf,omitempty"`
|
||||
IoDepthLevel FioDepth `json:"iodepth_level,omitempty"`
|
||||
IoDepthSubmit FioDepth `json:"iodepth_submit,omitempty"`
|
||||
IoDepthComplete FioDepth `json:"iodepth_complete,omitempty"`
|
||||
LatencyNs FioLatency `json:"latency_ns,omitempty"`
|
||||
LatencyUs FioLatency `json:"latency_us,omitempty"`
|
||||
LatencyMs FioLatency `json:"latency_ms,omitempty"`
|
||||
LatencyDepth int32 `json:"latency_depth,omitempty"`
|
||||
LatencyTarget int32 `json:"latency_target,omitempty"`
|
||||
LatencyPercentile float32 `json:"latency_percentile,omitempty"`
|
||||
LatencyWindow int32 `json:"latency_window,omitempty"`
|
||||
}
|
||||
|
||||
func (j FioJobs) String() string {
|
||||
var job string
|
||||
job += fmt.Sprintf("%s\n", j.JobOptions)
|
||||
if j.Read.Iops != 0 || j.Read.BW != 0 {
|
||||
job += fmt.Sprintf("read:\n%s\n", j.Read)
|
||||
}
|
||||
if j.Write.Iops != 0 || j.Write.BW != 0 {
|
||||
job += fmt.Sprintf("write:\n%s\n", j.Write)
|
||||
}
|
||||
return job
|
||||
}
|
||||
|
||||
type FioJobOptions struct {
|
||||
Name string `json:"name,omitempty"`
|
||||
BS string `json:"bs,omitempty"`
|
||||
Directory string `json:"directory,omitempty"`
|
||||
RW string `json:"rw,omitempty"`
|
||||
IOEngine string `json:"ioengine,omitempty"`
|
||||
FDataSync string `json:"fdatasync,omitempty"`
|
||||
Size string `json:"size,omitempty"`
|
||||
RunTime string `json:"runtime,omitempty"`
|
||||
}
|
||||
|
||||
func (o FioJobOptions) String() string {
|
||||
return fmt.Sprintf("JobName: %s\n blocksize=%s filesize=%s rw=%s", o.Name, o.BS, o.Size, o.RW)
|
||||
}
|
||||
|
||||
type FioStats struct {
|
||||
IOBytes int64 `json:"io_bytes,omitempty"`
|
||||
IOKBytes int64 `json:"io_kbytes,omitempty"`
|
||||
BWBytes int64 `json:"bw_bytes,omitempty"`
|
||||
BW int64 `json:"bw,omitempty"`
|
||||
Iops float32 `json:"iops,omitempty"`
|
||||
Runtime int64 `json:"runtime,omitempty"`
|
||||
TotalIos int64 `json:"total_ios,omitempty"`
|
||||
ShortIos int64 `json:"short_ios,omitempty"`
|
||||
DropIos int64 `json:"drop_ios,omitempty"`
|
||||
SlatNs FioNS `json:"slat_ns,omitempty"`
|
||||
ClatNs FioNS `json:"clat_ns,omitempty"`
|
||||
LatNs FioNS `json:"lat_ns,omitempty"`
|
||||
Percentile FioPercentile `json:"percentile,omitempty"`
|
||||
BwMin int64 `json:"bw_min,omitempty"`
|
||||
BwMax int64 `json:"bw_max,omitempty"`
|
||||
BwAgg float32 `json:"bw_agg,omitempty"`
|
||||
BwMean float32 `json:"bw_mean,omitempty"`
|
||||
BwDev float32 `json:"bw_dev,omitempty"`
|
||||
BwSamples int32 `json:"bw_samples,omitempty"`
|
||||
IopsMin int32 `json:"iops_min,omitempty"`
|
||||
IopsMax int32 `json:"iops_max,omitempty"`
|
||||
IopsMean float32 `json:"iops_mean,omitempty"`
|
||||
IopsStdDev float32 `json:"iops_stddev,omitempty"`
|
||||
IopsSamples int32 `json:"iops_samples,omitempty"`
|
||||
}
|
||||
|
||||
func (s FioStats) String() string {
|
||||
var stats string
|
||||
stats += fmt.Sprintf(" IOPS=%f BW(KiB/s)=%d\n", s.Iops, s.BW)
|
||||
stats += fmt.Sprintf(" iops: min=%d max=%d avg=%f\n", s.IopsMin, s.IopsMax, s.IopsMean)
|
||||
stats += fmt.Sprintf(" bw(KiB/s): min=%d max=%d avg=%f", s.BwMin, s.BwMax, s.BwMean)
|
||||
return stats
|
||||
}
|
||||
|
||||
func (s FioStats) FSPerfResults() FSPerfResults {
|
||||
return FSPerfResults{
|
||||
Min: time.Duration(s.LatNs.Min),
|
||||
Max: time.Duration(s.LatNs.Max),
|
||||
Average: time.Duration(s.LatNs.Mean),
|
||||
P1: time.Duration(s.LatNs.Percentile.P1),
|
||||
P5: time.Duration(s.LatNs.Percentile.P5),
|
||||
P10: time.Duration(s.LatNs.Percentile.P10),
|
||||
P20: time.Duration(s.LatNs.Percentile.P20),
|
||||
P30: time.Duration(s.LatNs.Percentile.P30),
|
||||
P40: time.Duration(s.LatNs.Percentile.P40),
|
||||
P50: time.Duration(s.LatNs.Percentile.P50),
|
||||
P60: time.Duration(s.LatNs.Percentile.P60),
|
||||
P70: time.Duration(s.LatNs.Percentile.P70),
|
||||
P80: time.Duration(s.LatNs.Percentile.P80),
|
||||
P90: time.Duration(s.LatNs.Percentile.P90),
|
||||
P95: time.Duration(s.LatNs.Percentile.P95),
|
||||
P99: time.Duration(s.LatNs.Percentile.P99),
|
||||
P995: time.Duration(s.LatNs.Percentile.P995),
|
||||
P999: time.Duration(s.LatNs.Percentile.P999),
|
||||
P9995: time.Duration(s.LatNs.Percentile.P9995),
|
||||
P9999: time.Duration(s.LatNs.Percentile.P9999),
|
||||
}
|
||||
}
|
||||
|
||||
type FioNS struct {
|
||||
Min int64 `json:"min,omitempty"`
|
||||
Max int64 `json:"max,omitempty"`
|
||||
Mean float32 `json:"mean,omitempty"`
|
||||
StdDev float32 `json:"stddev,omitempty"`
|
||||
N int64 `json:"N,omitempty"`
|
||||
Percentile FioPercentile `json:"percentile,omitempty"`
|
||||
}
|
||||
|
||||
type FioDepth struct {
|
||||
FioDepth0 float32 `json:"0,omitempty"`
|
||||
FioDepth1 float32 `json:"1,omitempty"`
|
||||
FioDepth2 float32 `json:"2,omitempty"`
|
||||
FioDepth4 float32 `json:"4,omitempty"`
|
||||
FioDepth8 float32 `json:"8,omitempty"`
|
||||
FioDepth16 float32 `json:"16,omitempty"`
|
||||
FioDepth32 float32 `json:"32,omitempty"`
|
||||
FioDepth64 float32 `json:"64,omitempty"`
|
||||
FioDepthGE64 float32 `json:">=64,omitempty"`
|
||||
}
|
||||
|
||||
type FioLatency struct {
|
||||
FioLat2 float32 `json:"2,omitempty"`
|
||||
FioLat4 float32 `json:"4,omitempty"`
|
||||
FioLat10 float32 `json:"10,omitempty"`
|
||||
FioLat20 float32 `json:"20,omitempty"`
|
||||
FioLat50 float32 `json:"50,omitempty"`
|
||||
FioLat100 float32 `json:"100,omitempty"`
|
||||
FioLat250 float32 `json:"250,omitempty"`
|
||||
FioLat500 float32 `json:"500,omitempty"`
|
||||
FioLat750 float32 `json:"750,omitempty"`
|
||||
FioLat1000 float32 `json:"1000,omitempty"`
|
||||
FioLat2000 float32 `json:"2000,omitempty"`
|
||||
FioLatGE2000 float32 `json:">=2000,omitempty"`
|
||||
}
|
||||
|
||||
type FioDiskUtil struct {
|
||||
Name string `json:"name,omitempty"`
|
||||
ReadIos int64 `json:"read_ios,omitempty"`
|
||||
WriteIos int64 `json:"write_ios,omitempty"`
|
||||
ReadMerges int64 `json:"read_merges,omitempty"`
|
||||
WriteMerges int64 `json:"write_merges,omitempty"`
|
||||
ReadTicks int64 `json:"read_ticks,omitempty"`
|
||||
WriteTicks int64 `json:"write_ticks,omitempty"`
|
||||
InQueue int64 `json:"in_queue,omitempty"`
|
||||
Util float32 `json:"util,omitempty"`
|
||||
}
|
||||
|
||||
type FioPercentile struct {
|
||||
P1 int `json:"1.000000,omitempty"`
|
||||
P5 int `json:"5.000000,omitempty"`
|
||||
P10 int `json:"10.000000,omitempty"`
|
||||
P20 int `json:"20.000000,omitempty"`
|
||||
P30 int `json:"30.000000,omitempty"`
|
||||
P40 int `json:"40.000000,omitempty"`
|
||||
P50 int `json:"50.000000,omitempty"`
|
||||
P60 int `json:"60.000000,omitempty"`
|
||||
P70 int `json:"70.000000,omitempty"`
|
||||
P80 int `json:"80.000000,omitempty"`
|
||||
P90 int `json:"90.000000,omitempty"`
|
||||
P95 int `json:"95.000000,omitempty"`
|
||||
P99 int `json:"99.000000,omitempty"`
|
||||
P995 int `json:"99.500000,omitempty"`
|
||||
P999 int `json:"99.900000,omitempty"`
|
||||
P9995 int `json:"99.950000,omitempty"`
|
||||
P9999 int `json:"99.990000,omitempty"`
|
||||
}
|
||||
|
||||
func (d FioDiskUtil) String() string {
|
||||
//Disk stats (read/write):
|
||||
//rbd4: ios=30022/11982, merge=0/313, ticks=1028675/1022768, in_queue=2063740, util=99.67%
|
||||
var du string
|
||||
du += fmt.Sprintf(" %s: ios=%d/%d merge=%d/%d ticks=%d/%d in_queue=%d, util=%f%%", d.Name, d.ReadIos,
|
||||
d.WriteIos, d.ReadMerges, d.WriteMerges, d.ReadTicks, d.WriteTicks, d.InQueue, d.Util)
|
||||
return du
|
||||
}
|
||||
|
||||
func parseCollectorOptions(hostCollector *troubleshootv1beta2.FilesystemPerformance) ([]string, *FioJobOptions, error) {
|
||||
|
||||
var operationSize uint64 = 1024
|
||||
if hostCollector.OperationSizeBytes > 0 {
|
||||
operationSize = hostCollector.OperationSizeBytes
|
||||
}
|
||||
var fileSize uint64 = 10 * 1024 * 1024
|
||||
if hostCollector.FileSize != "" {
|
||||
quantity, err := resource.ParseQuantity(hostCollector.FileSize)
|
||||
if err != nil {
|
||||
return nil, nil, errors.Wrapf(err, "failed to parse fileSize %q", hostCollector.FileSize)
|
||||
}
|
||||
fileSizeInt64, ok := quantity.AsInt64()
|
||||
if !ok {
|
||||
return nil, nil, errors.Wrapf(err, "failed to parse fileSize %q", hostCollector.FileSize)
|
||||
}
|
||||
if fileSizeInt64 <= 0 {
|
||||
return nil, nil, errors.Wrapf(err, "fileSize %q must be greater than 0", hostCollector.FileSize)
|
||||
}
|
||||
fileSize = uint64(fileSizeInt64)
|
||||
}
|
||||
|
||||
if hostCollector.Directory == "" {
|
||||
return nil, nil, errors.New("Directory is required to collect filesystem performance info")
|
||||
}
|
||||
|
||||
latencyBenchmarkOptions := FioJobOptions{
|
||||
RW: "write",
|
||||
IOEngine: "sync",
|
||||
FDataSync: "1",
|
||||
Directory: hostCollector.Directory,
|
||||
Size: strconv.FormatUint(fileSize, 10),
|
||||
BS: strconv.FormatUint(operationSize, 10),
|
||||
Name: "fsperf",
|
||||
RunTime: "120",
|
||||
}
|
||||
|
||||
command := buildFioCommand(latencyBenchmarkOptions)
|
||||
|
||||
return command, &latencyBenchmarkOptions, nil
|
||||
}
|
||||
|
||||
func buildFioCommand(opts FioJobOptions) []string {
|
||||
command := []string{"fio"}
|
||||
v := reflect.ValueOf(opts)
|
||||
t := reflect.TypeOf(opts)
|
||||
for i := 0; i < v.NumField(); i++ {
|
||||
field := t.Field(i)
|
||||
value := v.Field(i)
|
||||
if !value.IsZero() {
|
||||
command = append(command, fmt.Sprintf("--%s=%v", strings.ToLower(field.Name), value.Interface()))
|
||||
}
|
||||
}
|
||||
command = append(command, "--output-format=json")
|
||||
return command
|
||||
}
|
||||
|
||||
func collectFioResults(ctx context.Context, hostCollector *troubleshootv1beta2.FilesystemPerformance) (*FioResult, error) {
|
||||
|
||||
command, opts, err := parseCollectorOptions(hostCollector)
|
||||
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to parse collector options")
|
||||
}
|
||||
|
||||
klog.V(2).Infof("collecting fio results: %s", strings.Join(command, " "))
|
||||
output, err := exec.CommandContext(ctx, command[0], command[1:]...).Output()
|
||||
if err != nil {
|
||||
if exitErr, ok := err.(*exec.ExitError); ok {
|
||||
if exitErr.ExitCode() == 1 {
|
||||
return nil, errors.Wrapf(err, "fio failed; permission denied opening %s. ensure this collector runs as root", opts.Directory)
|
||||
} else {
|
||||
return nil, errors.Wrapf(err, "fio failed with exit status %d", exitErr.ExitCode())
|
||||
}
|
||||
} else if e, ok := err.(*exec.Error); ok && e.Err == exec.ErrNotFound {
|
||||
return nil, errors.Wrapf(err, "command not found: %v. ensure fio is installed", command)
|
||||
} else {
|
||||
return nil, errors.Wrapf(err, "failed to run command: %v", command)
|
||||
}
|
||||
}
|
||||
|
||||
var result FioResult
|
||||
err = json.Unmarshal([]byte(output), &result)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to unmarshal fio result")
|
||||
}
|
||||
|
||||
return &result, nil
|
||||
}
|
||||
|
||||
@@ -10,36 +10,22 @@ import (
|
||||
"math/rand"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
)
|
||||
|
||||
func init() {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
}
|
||||
|
||||
type Durations []time.Duration
|
||||
|
||||
func (d Durations) Len() int {
|
||||
return len(d)
|
||||
}
|
||||
|
||||
func (d Durations) Less(i, j int) bool {
|
||||
return d[i] < d[j]
|
||||
}
|
||||
|
||||
func (d Durations) Swap(i, j int) {
|
||||
d[i], d[j] = d[j], d[i]
|
||||
}
|
||||
// Today we only care about checking for write latency so the options struct
|
||||
// only has what we need for that. we'll collect all the results from a single run of fio
|
||||
// and filter out the fsync results for analysis. TODO: update the analyzer so any/all results
|
||||
// from fio can be analyzed.
|
||||
|
||||
func collectHostFilesystemPerformance(hostCollector *troubleshootv1beta2.FilesystemPerformance, bundlePath string) (map[string][]byte, error) {
|
||||
timeout := time.Minute
|
||||
|
||||
if hostCollector.Timeout != "" {
|
||||
d, err := time.ParseDuration(hostCollector.Timeout)
|
||||
if err != nil {
|
||||
@@ -50,46 +36,15 @@ func collectHostFilesystemPerformance(hostCollector *troubleshootv1beta2.Filesys
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
|
||||
var operationSize uint64 = 1024
|
||||
if hostCollector.OperationSizeBytes != 0 {
|
||||
operationSize = hostCollector.OperationSizeBytes
|
||||
collectorName := hostCollector.CollectorName
|
||||
if collectorName == "" {
|
||||
collectorName = "filesystemPerformance"
|
||||
}
|
||||
name := filepath.Join("host-collectors/filesystemPerformance", collectorName+".json")
|
||||
|
||||
var fileSize uint64 = 10 * 1024 * 1024
|
||||
if hostCollector.FileSize != "" {
|
||||
quantity, err := resource.ParseQuantity(hostCollector.FileSize)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "failed to parse fileSize %q", hostCollector.FileSize)
|
||||
}
|
||||
fileSizeInt64, ok := quantity.AsInt64()
|
||||
if !ok {
|
||||
return nil, errors.Wrapf(err, "failed to parse fileSize %q", hostCollector.FileSize)
|
||||
}
|
||||
fileSize = uint64(fileSizeInt64)
|
||||
}
|
||||
|
||||
if hostCollector.Directory == "" {
|
||||
return nil, errors.New("Directory is required to collect filesystem performance info")
|
||||
}
|
||||
// TODO: clean up this directory if its created
|
||||
if err := os.MkdirAll(hostCollector.Directory, 0700); err != nil {
|
||||
return nil, errors.Wrapf(err, "failed to mkdir %q", hostCollector.Directory)
|
||||
}
|
||||
filename := filepath.Join(hostCollector.Directory, "fsperf")
|
||||
|
||||
f, err := os.OpenFile(filename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
|
||||
if err != nil {
|
||||
log.Panic(err)
|
||||
return nil, errors.Wrapf(err, "open %s", filename)
|
||||
}
|
||||
defer func() {
|
||||
if err := f.Close(); err != nil {
|
||||
log.Println(err.Error())
|
||||
}
|
||||
if err := os.Remove(filename); err != nil {
|
||||
log.Println(err.Error())
|
||||
}
|
||||
}()
|
||||
|
||||
// Start the background IOPS task and wait for warmup
|
||||
if hostCollector.EnableBackgroundIOPS {
|
||||
@@ -123,86 +78,16 @@ func collectHostFilesystemPerformance(hostCollector *troubleshootv1beta2.Filesys
|
||||
time.Sleep(time.Second * time.Duration(hostCollector.BackgroundIOPSWarmupSeconds))
|
||||
}
|
||||
|
||||
// Sequential writes benchmark
|
||||
var written uint64 = 0
|
||||
var results Durations
|
||||
var fioResult *FioResult
|
||||
|
||||
for {
|
||||
if written >= fileSize {
|
||||
break
|
||||
}
|
||||
|
||||
data := make([]byte, int(operationSize))
|
||||
rand.Read(data)
|
||||
|
||||
start := time.Now()
|
||||
|
||||
n, err := f.Write(data)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "write to %s", filename)
|
||||
}
|
||||
if hostCollector.Sync {
|
||||
if err := f.Sync(); err != nil {
|
||||
return nil, errors.Wrapf(err, "sync %s", filename)
|
||||
}
|
||||
} else if hostCollector.Datasync {
|
||||
if err := syscall.Fdatasync(int(f.Fd())); err != nil {
|
||||
return nil, errors.Wrapf(err, "datasync %s", filename)
|
||||
}
|
||||
}
|
||||
|
||||
d := time.Now().Sub(start)
|
||||
results = append(results, d)
|
||||
|
||||
written += uint64(n)
|
||||
|
||||
if ctx.Err() != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(results) == 0 {
|
||||
return nil, errors.New("No filesystem performance results collected")
|
||||
}
|
||||
|
||||
sort.Sort(results)
|
||||
|
||||
var sum time.Duration
|
||||
for _, d := range results {
|
||||
sum += d
|
||||
}
|
||||
|
||||
fsPerf := &FSPerfResults{
|
||||
Min: results[0],
|
||||
Max: results[len(results)-1],
|
||||
Average: sum / time.Duration(len(results)),
|
||||
P1: results[getPercentileIndex(.01, len(results))],
|
||||
P5: results[getPercentileIndex(.05, len(results))],
|
||||
P10: results[getPercentileIndex(.1, len(results))],
|
||||
P20: results[getPercentileIndex(.2, len(results))],
|
||||
P30: results[getPercentileIndex(.3, len(results))],
|
||||
P40: results[getPercentileIndex(.4, len(results))],
|
||||
P50: results[getPercentileIndex(.5, len(results))],
|
||||
P60: results[getPercentileIndex(.6, len(results))],
|
||||
P70: results[getPercentileIndex(.7, len(results))],
|
||||
P80: results[getPercentileIndex(.8, len(results))],
|
||||
P90: results[getPercentileIndex(.9, len(results))],
|
||||
P95: results[getPercentileIndex(.95, len(results))],
|
||||
P99: results[getPercentileIndex(.99, len(results))],
|
||||
P995: results[getPercentileIndex(.995, len(results))],
|
||||
P999: results[getPercentileIndex(.999, len(results))],
|
||||
P9995: results[getPercentileIndex(.9995, len(results))],
|
||||
P9999: results[getPercentileIndex(.9999, len(results))],
|
||||
}
|
||||
|
||||
collectorName := hostCollector.CollectorName
|
||||
if collectorName == "" {
|
||||
collectorName = "filesystemPerformance"
|
||||
}
|
||||
name := filepath.Join("host-collectors/filesystemPerformance", collectorName+".json")
|
||||
b, err := json.Marshal(fsPerf)
|
||||
fioResult, err := collectFioResults(ctx, hostCollector)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to marshal fs perf results")
|
||||
return nil, errors.Wrap(err, "failed to collect fio results")
|
||||
}
|
||||
|
||||
b, err := json.Marshal(fioResult)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to unmarshal fio results")
|
||||
}
|
||||
|
||||
output := NewResult()
|
||||
|
||||
@@ -2,7 +2,10 @@ package collect
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
|
||||
)
|
||||
|
||||
func TestGetPercentileIndex(t *testing.T) {
|
||||
@@ -57,3 +60,128 @@ func TestGetPercentileIndex(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_parseCollectorOptions(t *testing.T) {
|
||||
type args struct {
|
||||
hostCollector *troubleshootv1beta2.FilesystemPerformance
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
wantCommand []string
|
||||
wantOptions *FioJobOptions
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "Happy spec",
|
||||
args: args{
|
||||
hostCollector: &troubleshootv1beta2.FilesystemPerformance{
|
||||
HostCollectorMeta: troubleshootv1beta2.HostCollectorMeta{
|
||||
CollectorName: "fsperf",
|
||||
},
|
||||
OperationSizeBytes: 1024,
|
||||
Directory: "/var/lib/etcd",
|
||||
FileSize: "22Mi",
|
||||
Sync: true,
|
||||
Datasync: true,
|
||||
Timeout: "120",
|
||||
EnableBackgroundIOPS: true,
|
||||
BackgroundIOPSWarmupSeconds: 10,
|
||||
BackgroundWriteIOPS: 100,
|
||||
BackgroundReadIOPS: 100,
|
||||
BackgroundWriteIOPSJobs: 1,
|
||||
BackgroundReadIOPSJobs: 1,
|
||||
},
|
||||
},
|
||||
wantCommand: []string{
|
||||
"fio",
|
||||
"--name=fsperf",
|
||||
"--bs=1024",
|
||||
"--directory=/var/lib/etcd",
|
||||
"--rw=write",
|
||||
"--ioengine=sync",
|
||||
"--fdatasync=1",
|
||||
"--size=23068672",
|
||||
"--runtime=120",
|
||||
"--output-format=json",
|
||||
},
|
||||
wantOptions: &FioJobOptions{
|
||||
RW: "write",
|
||||
IOEngine: "sync",
|
||||
FDataSync: "1",
|
||||
Directory: "/var/lib/etcd",
|
||||
Size: "23068672",
|
||||
BS: "1024",
|
||||
Name: "fsperf",
|
||||
RunTime: "120",
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "Empty spec fails",
|
||||
args: args{
|
||||
hostCollector: &troubleshootv1beta2.FilesystemPerformance{
|
||||
HostCollectorMeta: troubleshootv1beta2.HostCollectorMeta{
|
||||
CollectorName: "fsperf",
|
||||
},
|
||||
},
|
||||
},
|
||||
wantCommand: nil,
|
||||
wantOptions: nil,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "Invalid filesize",
|
||||
args: args{
|
||||
hostCollector: &troubleshootv1beta2.FilesystemPerformance{
|
||||
HostCollectorMeta: troubleshootv1beta2.HostCollectorMeta{
|
||||
CollectorName: "fsperf",
|
||||
},
|
||||
OperationSizeBytes: 1024,
|
||||
Directory: "/var/lib/etcd",
|
||||
FileSize: "abcd",
|
||||
Sync: true,
|
||||
Datasync: true,
|
||||
Timeout: "120",
|
||||
},
|
||||
},
|
||||
wantCommand: nil,
|
||||
wantOptions: nil,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "invalid path parameter",
|
||||
args: args{
|
||||
hostCollector: &troubleshootv1beta2.FilesystemPerformance{
|
||||
HostCollectorMeta: troubleshootv1beta2.HostCollectorMeta{
|
||||
CollectorName: "fsperf",
|
||||
},
|
||||
OperationSizeBytes: 1024,
|
||||
Directory: "",
|
||||
FileSize: "22Mi",
|
||||
Sync: true,
|
||||
Datasync: true,
|
||||
Timeout: "120",
|
||||
},
|
||||
},
|
||||
wantCommand: nil,
|
||||
wantOptions: nil,
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gotCommand, gotOptions, err := parseCollectorOptions(tt.args.hostCollector)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("parseCollectorOptions() error = %v, wantErr %v", err, tt.wantErr)
|
||||
} else {
|
||||
if !reflect.DeepEqual(gotCommand, tt.wantCommand) {
|
||||
t.Errorf("parseCollectorOptions() got command = %v, want %v", gotCommand, tt.wantCommand)
|
||||
}
|
||||
if !reflect.DeepEqual(gotOptions, tt.wantOptions) {
|
||||
t.Errorf("parseCollectorOptions() got options = %v, want %v", gotOptions, tt.wantOptions)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
19
testdata/filesystem_performance_preflight.yaml
vendored
Normal file
19
testdata/filesystem_performance_preflight.yaml
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
apiVersion: troubleshoot.sh/v1beta2
|
||||
kind: HostPreflight
|
||||
metadata:
|
||||
name: sample
|
||||
spec:
|
||||
collectors:
|
||||
- filesystemPerformance:
|
||||
collectorName: Filesystem Latency Two Minute Benchmark
|
||||
timeout: 2m
|
||||
directory: /var/lib/etcd
|
||||
fileSize: 22Mi
|
||||
operationSizeBytes: 2300
|
||||
datasync: true
|
||||
enableBackgroundIOPS: true
|
||||
backgroundIOPSWarmupSeconds: 10
|
||||
backgroundWriteIOPS: 300
|
||||
backgroundWriteIOPSJobs: 6
|
||||
backgroundReadIOPS: 50
|
||||
backgroundReadIOPSJobs: 1
|
||||
552
testdata/kurl_preflights.yaml
vendored
Normal file
552
testdata/kurl_preflights.yaml
vendored
Normal file
@@ -0,0 +1,552 @@
|
||||
# https://kurl.sh/docs/install-with-kurl/system-requirements
|
||||
apiVersion: troubleshoot.sh/v1beta2
|
||||
kind: HostPreflight
|
||||
metadata:
|
||||
name: kurl-builtin
|
||||
spec:
|
||||
collectors:
|
||||
- time: {}
|
||||
- cpu: {}
|
||||
- memory: {}
|
||||
- hostServices: {}
|
||||
- hostOS: {}
|
||||
- diskUsage:
|
||||
collectorName: "Ephemeral Disk Usage /var/lib/kubelet"
|
||||
path: /var/lib/kubelet
|
||||
- diskUsage:
|
||||
collectorName: "Ephemeral Disk Usage /var/lib/docker"
|
||||
path: /var/lib/docker
|
||||
exclude: '{{kurl not .Installer.Spec.Docker.Version}}'
|
||||
- diskUsage:
|
||||
collectorName: "Ephemeral Disk Usage /var/lib/containerd"
|
||||
path: /var/lib/containerd
|
||||
exclude: '{{kurl not .Installer.Spec.Containerd.Version}}'
|
||||
- diskUsage:
|
||||
collectorName: "Ephemeral Disk Usage /var/lib/rook"
|
||||
path: /var/lib/rook
|
||||
exclude: '{{kurl not .Installer.Spec.Rook.Version}}'
|
||||
- diskUsage:
|
||||
collectorName: "Ephemeral Disk Usage /var/openebs"
|
||||
path: /var/openebs
|
||||
exclude: '{{kurl not .Installer.Spec.OpenEBS.Version}}'
|
||||
- tcpLoadBalancer:
|
||||
collectorName: "Kubernetes API Server Load Balancer"
|
||||
port: 6443
|
||||
address: {{kurl .Installer.Spec.Kubernetes.LoadBalancerAddress }}
|
||||
timeout: 3m
|
||||
# ha and is first master (primary and not join) and not is upgrade
|
||||
exclude: '{{kurl and .Installer.Spec.Kubernetes.Version .Installer.Spec.Kubernetes.LoadBalancerAddress .IsPrimary (not .IsJoin) (not .IsUpgrade) | not }}'
|
||||
- http:
|
||||
collectorName: "Kubernetes API Server Load Balancer Upgrade"
|
||||
get:
|
||||
url: https://{{kurl .Installer.Spec.Kubernetes.LoadBalancerAddress | trimSuffix "/" }}/healthz
|
||||
insecureSkipVerify: true
|
||||
# ha and is first master (primary and not join) and is upgrade (the load balancer backend should already be available)
|
||||
exclude: '{{kurl and .Installer.Spec.Kubernetes.Version .Installer.Spec.Kubernetes.LoadBalancerAddress .IsPrimary .IsUpgrade (not .IsJoin) | not }}'
|
||||
- tcpPortStatus:
|
||||
collectorName: "Kubernetes API TCP Port Status"
|
||||
port: 6443
|
||||
exclude: '{{kurl and .IsPrimary (not .IsUpgrade) | not }}'
|
||||
- tcpPortStatus:
|
||||
collectorName: "ETCD Client API TCP Port Status"
|
||||
port: 2379
|
||||
exclude: '{{kurl and .IsPrimary (not .IsUpgrade) | not }}'
|
||||
- tcpPortStatus:
|
||||
collectorName: "ETCD Server API TCP Port Status"
|
||||
port: 2380
|
||||
exclude: '{{kurl and .IsPrimary (not .IsUpgrade) | not }}'
|
||||
- tcpPortStatus:
|
||||
collectorName: "ETCD Health Server TCP Port Status"
|
||||
port: 2381
|
||||
exclude: '{{kurl and .IsPrimary (not .IsUpgrade) | not }}'
|
||||
interface: lo
|
||||
- tcpPortStatus:
|
||||
collectorName: "Kubelet Health Server TCP Port Status"
|
||||
port: 10248
|
||||
exclude: '{{kurl and (not .IsUpgrade) | not }}'
|
||||
interface: lo
|
||||
- tcpPortStatus:
|
||||
collectorName: "Kubelet API TCP Port Status"
|
||||
port: 10250
|
||||
exclude: '{{kurl and (not .IsUpgrade) | not }}'
|
||||
- tcpPortStatus:
|
||||
collectorName: "Kube Controller Manager Health Server TCP Port Status"
|
||||
port: 10257
|
||||
exclude: '{{kurl and .IsPrimary (not .IsUpgrade) | not }}'
|
||||
interface: lo
|
||||
- tcpPortStatus:
|
||||
collectorName: "Kube Scheduler Health Server TCP Port Status"
|
||||
port: 10259
|
||||
exclude: '{{kurl and .IsPrimary (not .IsUpgrade) | not }}'
|
||||
interface: lo
|
||||
- tcpConnect:
|
||||
collectorName: "Kubernetes API TCP Connection Status"
|
||||
address: '{{kurl .Installer.Spec.Kubernetes.MasterAddress }}'
|
||||
# run the collector if 1. there is a master address set AND this is a node joining the cluster AND this is not an EKCO internalLB install
|
||||
exclude: '{{kurl and .Installer.Spec.Kubernetes.Version .Installer.Spec.Kubernetes.MasterAddress .IsJoin (and .Installer.Spec.Ekco.Version .Installer.Spec.Ekco.EnableInternalLoadBalancer | not) | not }}'
|
||||
- filesystemPerformance:
|
||||
collectorName: Filesystem Latency Two Minute Benchmark
|
||||
exclude: '{{kurl and .IsPrimary (not .IsUpgrade) | not }}'
|
||||
timeout: 2m
|
||||
directory: /var/lib/etcd
|
||||
fileSize: 22Mi
|
||||
operationSizeBytes: 2300
|
||||
datasync: true
|
||||
enableBackgroundIOPS: true
|
||||
backgroundIOPSWarmupSeconds: 10
|
||||
backgroundWriteIOPS: 300
|
||||
backgroundWriteIOPSJobs: 6
|
||||
backgroundReadIOPS: 50
|
||||
backgroundReadIOPSJobs: 1
|
||||
- certificate:
|
||||
collectorName: "Kubernetes API key pair certificate"
|
||||
exclude: '{{kurl or (not .IsPrimary) (not .IsUpgrade) }}'
|
||||
certificatePath: /etc/kubernetes/pki/apiserver.crt
|
||||
keyPath: /etc/kubernetes/pki/apiserver.key
|
||||
- certificate:
|
||||
collectorName: "Kubernetes ETCD key pair certificate"
|
||||
exclude: '{{kurl or (not .IsPrimary) (not .IsUpgrade) }}'
|
||||
certificatePath: /etc/kubernetes/pki/etcd/server.crt
|
||||
keyPath: /etc/kubernetes/pki/etcd/server.key
|
||||
- http:
|
||||
collectorName: "Kubernetes API Health"
|
||||
exclude: '{{kurl or (not .IsPrimary) (not .IsUpgrade) }}'
|
||||
get:
|
||||
url: https://localhost:6443/healthz
|
||||
insecureSkipVerify: true
|
||||
analyzers:
|
||||
- certificate:
|
||||
collectorName: "Kubernetes API key pair certificate"
|
||||
exclude: '{{kurl or (not .IsPrimary) (not .IsUpgrade) }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "key-pair-missing"
|
||||
message: Kubernetes API key pair certificate not found in /etc/kubernetes/pki/apiserver.*
|
||||
- fail:
|
||||
when: "key-pair-switched"
|
||||
message: Kubernetes API key pair certificate and key pair are switched
|
||||
- fail:
|
||||
when: "key-pair-encrypted"
|
||||
message: Kubernetes API key pair certificate private key is encrypted
|
||||
- fail:
|
||||
when: "key-pair-mismatch"
|
||||
message: Kubernetes API key pair certificate and key do not match
|
||||
- fail:
|
||||
when: "key-pair-invalid"
|
||||
message: Kubernetes API key pair certificate is invalid
|
||||
- pass:
|
||||
when: "key-pair-valid"
|
||||
message: Kubernetes API key pair certificate is valid
|
||||
- certificate:
|
||||
collectorName: "Kubernetes ETCD key pair certificate"
|
||||
exclude: '{{kurl or (not .IsPrimary) (not .IsUpgrade) }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "key-pair-missing"
|
||||
message: Kubernetes ETCD key pair certificate not found in /etc/kubernetes/pki/etcd/server.*
|
||||
- fail:
|
||||
when: "key-pair-switched"
|
||||
message: Kubernetes ETCD certificate and key pair are switched
|
||||
- fail:
|
||||
when: "key-pair-encrypted"
|
||||
message: Kubernetes ETCD certificate private key is encrypted
|
||||
- fail:
|
||||
when: "key-pair-mismatch"
|
||||
message: Kubernetes ETCD certificate and key do not match
|
||||
- fail:
|
||||
when: "key-pair-invalid"
|
||||
message: Kubernetes ETCD key pair certificate is invalid
|
||||
- pass:
|
||||
when: "key-pair-valid"
|
||||
message: Kubernetes ETCD key pair certificate is valid
|
||||
- http:
|
||||
checkName: "Kubernetes API Health"
|
||||
exclude: '{{kurl or (not .IsPrimary) (not .IsUpgrade) }}'
|
||||
collectorName: "Kubernetes API Health"
|
||||
outcomes:
|
||||
- warn:
|
||||
when: "error"
|
||||
message: Error connecting to Kubernetes API at https://localhost:6443/healthz
|
||||
- pass:
|
||||
when: "statusCode == 200"
|
||||
message: OK HTTP status response from Kubernetes API at https://localhost:6443/healthz
|
||||
- warn:
|
||||
message: Unexpected status code response from Kubernetes API at https://localhost:6443/healthz
|
||||
- cpu:
|
||||
checkName: "Number of CPUs"
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "count < 2"
|
||||
message: At least 2 CPU cores are required, and 4 CPU cores are recommended
|
||||
- warn:
|
||||
when: "count < 4"
|
||||
message: At least 4 CPU cores are recommended
|
||||
- pass:
|
||||
message: This server has at least 4 CPU cores
|
||||
- memory:
|
||||
checkName: "Amount of Memory"
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "< 4G"
|
||||
message: At least 4G of memory is required, and 8G of memory is recommended
|
||||
- warn:
|
||||
when: "< 8G"
|
||||
message: At least 8G of memory is recommended
|
||||
- pass:
|
||||
message: The system has at least 8G of memory
|
||||
- diskUsage:
|
||||
checkName: "Ephemeral Disk Usage /var/lib/kubelet"
|
||||
collectorName: "Ephemeral Disk Usage /var/lib/kubelet"
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "total < 30Gi"
|
||||
message: The disk containing directory /var/lib/kubelet has less than 30Gi of total space
|
||||
- fail:
|
||||
when: "used/total > 80%"
|
||||
message: The disk containing directory /var/lib/kubelet is more than 80% full
|
||||
- warn:
|
||||
when: "used/total > 60%"
|
||||
message: The disk containing directory /var/lib/kubelet is more than 60% full
|
||||
- warn:
|
||||
when: "available < 10Gi"
|
||||
message: The disk containing directory /var/lib/kubelet has less than 10Gi of disk space available
|
||||
- pass:
|
||||
message: The disk containing directory /var/lib/kubelet has at least 30Gi of total space, has at least 10Gi of disk space available, and is less than 60% full
|
||||
- diskUsage:
|
||||
checkName: "Ephemeral Disk Usage /var/lib/docker"
|
||||
collectorName: "Ephemeral Disk Usage /var/lib/docker"
|
||||
exclude: '{{kurl not .Installer.Spec.Docker.Version}}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "total < 30Gi"
|
||||
message: The disk containing directory /var/lib/docker has less than 30Gi of total space
|
||||
- fail:
|
||||
when: "used/total > 80%"
|
||||
message: The disk containing directory /var/lib/docker is more than 80% full
|
||||
- warn:
|
||||
when: "used/total > 60%"
|
||||
message: The disk containing directory /var/lib/docker is more than 60% full
|
||||
- warn:
|
||||
when: "available < 10Gi"
|
||||
message: The disk containing directory /var/lib/docker has less than 10Gi of disk space available
|
||||
- pass:
|
||||
message: The disk containing directory /var/lib/docker has at least 30Gi of total space, has at least 10Gi of disk space available, and is less than 60% full.
|
||||
- diskUsage:
|
||||
checkName: "Ephemeral Disk Usage /var/lib/containerd"
|
||||
collectorName: "Ephemeral Disk Usage /var/lib/containerd"
|
||||
exclude: '{{kurl not .Installer.Spec.Containerd.Version}}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "total < 30Gi"
|
||||
message: The disk containing directory /var/lib/containerd has less than 30Gi of total space
|
||||
- fail:
|
||||
when: "used/total > 80%"
|
||||
message: The disk containing directory /var/lib/containerd is more than 80% full
|
||||
- warn:
|
||||
when: "used/total > 60%"
|
||||
message: The disk containing directory /var/lib/containerd is more than 60% full
|
||||
- warn:
|
||||
when: "available < 10Gi"
|
||||
message: The disk containing directory /var/lib/containerd has less than 10Gi of disk space available
|
||||
- pass:
|
||||
message: The disk containing directory /var/lib/containerd has at least 30Gi of total space, has at least 10Gi of disk space available, and is less than 60% full.
|
||||
- diskUsage:
|
||||
checkName: "Ephemeral Disk Usage /var/lib/rook"
|
||||
collectorName: "Ephemeral Disk Usage /var/lib/rook"
|
||||
exclude: '{{kurl not .Installer.Spec.Rook.Version}}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "used/total > 80%"
|
||||
message: The disk containing directory /var/lib/rook is more than 80% full
|
||||
- fail:
|
||||
when: "available < 10Gi"
|
||||
message: The disk containing directory /var/lib/rook has less than 10Gi of disk space available
|
||||
- pass:
|
||||
message: The disk containing directory /var/lib/rook has sufficient space
|
||||
- diskUsage:
|
||||
checkName: "Ephemeral Disk Usage /var/openebs"
|
||||
collectorName: "Ephemeral Disk Usage /var/openebs"
|
||||
exclude: '{{kurl not .Installer.Spec.OpenEBS.Version}}'
|
||||
outcomes:
|
||||
- warn:
|
||||
when: "used/total > 80%"
|
||||
message: The disk containing directory /var/openebs is more than 80% full
|
||||
- warn:
|
||||
when: "available < 10Gi"
|
||||
message: The disk containing directory /var/openebs has less than 10Gi of disk space available
|
||||
- pass:
|
||||
message: The disk containing directory /var/openebs has sufficient space
|
||||
- tcpLoadBalancer:
|
||||
checkName: "Kubernetes API Server Load Balancer"
|
||||
collectorName: "Kubernetes API Server Load Balancer"
|
||||
# ha and is first master (primary and not join) and not is upgrade
|
||||
exclude: '{{kurl and .Installer.Spec.Kubernetes.Version .Installer.Spec.Kubernetes.LoadBalancerAddress .IsPrimary (not .IsJoin) (not .IsUpgrade) | not }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "invalid-address"
|
||||
message: The load balancer address {{kurl .Installer.Spec.Kubernetes.LoadBalancerAddress }} is not valid.
|
||||
- warn:
|
||||
when: "connection-refused"
|
||||
message: Connection to {{kurl .Installer.Spec.Kubernetes.LoadBalancerAddress }} via load balancer was refused.
|
||||
- warn:
|
||||
when: "connection-timeout"
|
||||
message: Timed out connecting to {{kurl .Installer.Spec.Kubernetes.LoadBalancerAddress }} via load balancer. Check your firewall.
|
||||
- warn:
|
||||
when: "error"
|
||||
message: Unexpected port status
|
||||
- warn:
|
||||
when: "address-in-use"
|
||||
message: Port 6443 is unavailable
|
||||
- pass:
|
||||
when: "connected"
|
||||
message: Successfully connected to {{kurl .Installer.Spec.Kubernetes.LoadBalancerAddress }} via load balancer
|
||||
- warn:
|
||||
message: Unexpected port status
|
||||
- http:
|
||||
checkName: "Kubernetes API Server Load Balancer Upgrade"
|
||||
collectorName: "Kubernetes API Server Load Balancer Upgrade"
|
||||
exclude: '{{kurl and .Installer.Spec.Kubernetes.Version .Installer.Spec.Kubernetes.LoadBalancerAddress .IsPrimary .IsUpgrade (not .IsJoin) | not }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "error"
|
||||
message: Error connecting to load balancer at https://{{kurl .Installer.Spec.Kubernetes.LoadBalancerAddress }}/healthz
|
||||
- pass:
|
||||
when: "statusCode == 200"
|
||||
message: OK HTTP status response from load balancer at https://{{kurl .Installer.Spec.Kubernetes.LoadBalancerAddress }}/healthz
|
||||
- fail:
|
||||
message: Unexpected status code response from load balancer at https://{{kurl .Installer.Spec.Kubernetes.LoadBalancerAddress }}/healthz
|
||||
- tcpPortStatus:
|
||||
checkName: "Kubernetes API TCP Port Status"
|
||||
collectorName: "Kubernetes API TCP Port Status"
|
||||
exclude: '{{kurl and .IsPrimary (not .IsUpgrade) | not }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "connection-refused"
|
||||
message: Connection to port 6443 was refused. This is likely to be a routing problem since this preflight configures a test server to listen on this port.
|
||||
- warn:
|
||||
when: "address-in-use"
|
||||
message: Another process was already listening on port 6443.
|
||||
- fail:
|
||||
when: "connection-timeout"
|
||||
message: Timed out connecting to port 6443. Check your firewall.
|
||||
- fail:
|
||||
when: "error"
|
||||
message: Unexpected port status
|
||||
- pass:
|
||||
when: "connected"
|
||||
message: Port 6443 is open
|
||||
- warn:
|
||||
message: Unexpected port status
|
||||
- tcpPortStatus:
|
||||
checkName: "ETCD Client API TCP Port Status"
|
||||
collectorName: "ETCD Client API TCP Port Status"
|
||||
exclude: '{{kurl and .IsPrimary (not .IsUpgrade) | not }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "connection-refused"
|
||||
message: Connection to port 2379 was refused. This is likely to be a routing problem since this preflight configures a test server to listen on this port.
|
||||
- warn:
|
||||
when: "address-in-use"
|
||||
message: Another process was already listening on port 2379.
|
||||
- fail:
|
||||
when: "connection-timeout"
|
||||
message: Timed out connecting to port 2379. Check your firewall.
|
||||
- fail:
|
||||
when: "error"
|
||||
message: Unexpected port status
|
||||
- pass:
|
||||
when: "connected"
|
||||
message: Port 2379 is open
|
||||
- warn:
|
||||
message: Unexpected port status
|
||||
- tcpPortStatus:
|
||||
checkName: "ETCD Server API TCP Port Status"
|
||||
collectorName: "ETCD Server API TCP Port Status"
|
||||
exclude: '{{kurl and .IsPrimary (not .IsUpgrade) | not }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "connection-refused"
|
||||
message: Connection to port 2380 was refused. This is likely to be a routing problem since this preflight configures a test server to listen on this port.
|
||||
- warn:
|
||||
when: "address-in-use"
|
||||
message: Another process was already listening on port 2380.
|
||||
- fail:
|
||||
when: "connection-timeout"
|
||||
message: Timed out connecting to port 2380. Check your firewall.
|
||||
- fail:
|
||||
when: "error"
|
||||
message: Unexpected port status
|
||||
- pass:
|
||||
when: "connected"
|
||||
message: Port 2380 is open
|
||||
- warn:
|
||||
message: Unexpected port status
|
||||
- tcpPortStatus:
|
||||
checkName: "ETCD Health Server TCP Port Status"
|
||||
collectorName: "ETCD Health Server TCP Port Status"
|
||||
exclude: '{{kurl and .IsPrimary (not .IsUpgrade) | not }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "connection-refused"
|
||||
message: Connection to port 2381 was refused. This is likely to be a routing problem since this preflight configures a test server to listen on this port.
|
||||
- warn:
|
||||
when: "address-in-use"
|
||||
message: Another process was already listening on port 2381.
|
||||
- fail:
|
||||
when: "connection-timeout"
|
||||
message: Timed out connecting to port 2381. Check your firewall.
|
||||
- fail:
|
||||
when: "error"
|
||||
message: Unexpected port status
|
||||
- pass:
|
||||
when: "connected"
|
||||
message: Port 2381 is available
|
||||
- warn:
|
||||
message: Unexpected port status
|
||||
- tcpPortStatus:
|
||||
checkName: "Kubelet Health Server TCP Port Status"
|
||||
collectorName: "Kubelet Health Server TCP Port Status"
|
||||
exclude: '{{kurl and (not .IsUpgrade) | not }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "connection-refused"
|
||||
message: Connection to port 10248 was refused. This is likely to be a routing problem since this preflight configures a test server to listen on this port.
|
||||
- warn:
|
||||
when: "address-in-use"
|
||||
message: Another process was already listening on port 10248.
|
||||
- fail:
|
||||
when: "connection-timeout"
|
||||
message: Timed out connecting to port 10248. Check your firewall.
|
||||
- fail:
|
||||
when: "error"
|
||||
message: Unexpected port status
|
||||
- pass:
|
||||
when: "connected"
|
||||
message: Port 10248 is available
|
||||
- warn:
|
||||
message: Unexpected port status
|
||||
- tcpPortStatus:
|
||||
checkName: "Kubelet API TCP Port Status"
|
||||
collectorName: "Kubelet API TCP Port Status"
|
||||
exclude: '{{kurl and (not .IsUpgrade) | not }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "connection-refused"
|
||||
message: Connection to port 10250 was refused. This is likely to be a routing problem since this preflight configures a test server to listen on this port.
|
||||
- warn:
|
||||
when: "address-in-use"
|
||||
message: Another process was already listening on port 10250.
|
||||
- fail:
|
||||
when: "connection-timeout"
|
||||
message: Timed out connecting to port 10250. Check your firewall.
|
||||
- fail:
|
||||
when: "error"
|
||||
message: Unexpected port status
|
||||
- pass:
|
||||
when: "connected"
|
||||
message: Port 10250 is open
|
||||
- warn:
|
||||
message: Unexpected port status
|
||||
- tcpPortStatus:
|
||||
checkName: "Kube Controller Manager Health Server TCP Port Status"
|
||||
collectorName: "Kube Controller Manager Health Server TCP Port Status"
|
||||
exclude: '{{kurl and .IsPrimary (not .IsUpgrade) | not }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "connection-refused"
|
||||
message: Connection to port 10257 was refused. This is likely to be a routing problem since this preflight configures a test server to listen on this port.
|
||||
- warn:
|
||||
when: "address-in-use"
|
||||
message: Another process was already listening on port 10257.
|
||||
- fail:
|
||||
when: "connection-timeout"
|
||||
message: Timed out connecting to port 10257. Check your firewall.
|
||||
- fail:
|
||||
when: "error"
|
||||
message: Unexpected port status
|
||||
- pass:
|
||||
when: "connected"
|
||||
message: Port 10257 is available
|
||||
- warn:
|
||||
message: Unexpected port status
|
||||
- tcpPortStatus:
|
||||
checkName: "Kube Scheduler Health Server TCP Port Status"
|
||||
collectorName: "Kube Scheduler Health Server TCP Port Status"
|
||||
exclude: '{{kurl and .IsPrimary (not .IsUpgrade) | not }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "connection-refused"
|
||||
message: Connection to port 10259 was refused. This is likely to be a routing problem since this preflight configures a test server to listen on this port.
|
||||
- warn:
|
||||
when: "address-in-use"
|
||||
message: Another process was already listening on port 10259.
|
||||
- fail:
|
||||
when: "connection-timeout"
|
||||
message: Timed out connecting to port 10259. Check your firewall.
|
||||
- fail:
|
||||
when: "error"
|
||||
message: Unexpected port status
|
||||
- pass:
|
||||
when: "connected"
|
||||
message: Port 10259 is available
|
||||
- warn:
|
||||
message: Unexpected port status
|
||||
- tcpConnect:
|
||||
checkName: "Kubernetes API TCP Connection Status"
|
||||
collectorName: "Kubernetes API TCP Connection Status"
|
||||
# run the analyzer if 1. there is a master address set AND this is a node joining the cluster AND this is not an EKCO internalLB install
|
||||
exclude: '{{kurl and .Installer.Spec.Kubernetes.Version .Installer.Spec.Kubernetes.MasterAddress .IsJoin (and .Installer.Spec.Ekco.Version .Installer.Spec.Ekco.EnableInternalLoadBalancer | not) | not }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "connection-refused"
|
||||
message: Connection to the Kubernetes API at address {{kurl .Installer.Spec.Kubernetes.MasterAddress }} was refused
|
||||
- fail:
|
||||
when: "connection-timeout"
|
||||
message: Timed out connecting to the Kubernetes API at address {{kurl .Installer.Spec.Kubernetes.MasterAddress }}
|
||||
- fail:
|
||||
when: "error"
|
||||
message: Unexpected error connecting to the Kubernetes API at address {{kurl .Installer.Spec.Kubernetes.MasterAddress }}
|
||||
- pass:
|
||||
when: "connected"
|
||||
message: Successfully connected to the Kubernetes API at address {{kurl .Installer.Spec.Kubernetes.MasterAddress }}
|
||||
- filesystemPerformance:
|
||||
collectorName: Filesystem Latency Two Minute Benchmark
|
||||
exclude: '{{kurl and .IsPrimary (not .IsUpgrade) | not }}'
|
||||
outcomes:
|
||||
- pass:
|
||||
when: "p99 < 10ms"
|
||||
message: "Write latency is ok (p99 target < 10ms, actual: {{ .P99 }})"
|
||||
- warn:
|
||||
message: "Write latency is high. p99 target < 10ms, actual:{{ .String }}"
|
||||
- time:
|
||||
checkName: "NTP Status"
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "ntp == unsynchronized+inactive"
|
||||
message: "System clock is not synchronized"
|
||||
- warn:
|
||||
when: "ntp == unsynchronized+active"
|
||||
message: System clock not yet synchronized
|
||||
- pass:
|
||||
when: "ntp == synchronized+active"
|
||||
message: "System clock is synchronized"
|
||||
- warn:
|
||||
when: "timezone != UTC"
|
||||
message: "Non UTC timezone can interfere with system function"
|
||||
- pass:
|
||||
when: "timezone == UTC"
|
||||
message: "Timezone is set to UTC"
|
||||
- hostOS:
|
||||
checkName: "Docker Support"
|
||||
exclude: '{{kurl or (not .Installer.Spec.Docker.Version) (semverCompare ">= 20.10.17" .Installer.Spec.Docker.Version) }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
when: "ubuntu = 22.04"
|
||||
message: "Docker versions < 20.10.17 not supported on ubuntu 22.04"
|
||||
# hijack hostOS analyzer in order to analyze the kURL Installer spec
|
||||
- hostOS:
|
||||
checkName: "Containerd and Weave Compatibility"
|
||||
exclude: '{{kurl or (not .Installer.Spec.Weave.Version) (not .Installer.Spec.Containerd.Version) (semverCompare "1.6.0 - 1.6.4" .Installer.Spec.Containerd.Version | not) }}'
|
||||
outcomes:
|
||||
- fail:
|
||||
message: "Weave is not compatible with containerd versions 1.6.0 - 1.6.4"
|
||||
Reference in New Issue
Block a user