mirror of
https://github.com/replicatedhq/troubleshoot.git
synced 2026-04-15 07:16:34 +00:00
Merge pull request #575 from replicatedhq/diamonwiggins/sc-40946/ceph-analyzer-health-status
Ceph health status messages in Analzyer result
This commit is contained in:
@@ -74,7 +74,17 @@ type CephStatus struct {
|
||||
}
|
||||
|
||||
type HealthStatus struct {
|
||||
Status string `json:"status"`
|
||||
Status string `json:"status"`
|
||||
Checks map[string]CheckMessage `json:"checks"`
|
||||
}
|
||||
|
||||
type CheckMessage struct {
|
||||
Severity string `json:"severity"`
|
||||
Summary Summary `json:"summary"`
|
||||
}
|
||||
|
||||
type Summary struct {
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
type OsdMap struct {
|
||||
@@ -121,6 +131,7 @@ func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFil
|
||||
if outcome.Fail.When == "" {
|
||||
outcome.Fail.When = string(CephHealthErr)
|
||||
}
|
||||
|
||||
match, err := compareCephStatus(status.Health.Status, outcome.Fail.When)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to compare ceph status")
|
||||
@@ -134,6 +145,7 @@ func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFil
|
||||
if outcome.Warn.When == "" {
|
||||
outcome.Warn.When = string(CephHealthWarn)
|
||||
}
|
||||
|
||||
match, err := compareCephStatus(status.Health.Status, outcome.Warn.When)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to compare ceph status")
|
||||
@@ -147,6 +159,7 @@ func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFil
|
||||
if outcome.Pass.When == "" {
|
||||
outcome.Pass.When = string(CephHealthOK)
|
||||
}
|
||||
|
||||
match, err := compareCephStatus(status.Health.Status, outcome.Pass.When)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to compare ceph status")
|
||||
@@ -154,6 +167,7 @@ func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFil
|
||||
analyzeResult.IsPass = true
|
||||
analyzeResult.Message = outcome.Pass.Message
|
||||
analyzeResult.URI = outcome.Pass.URI
|
||||
|
||||
return analyzeResult, nil
|
||||
}
|
||||
}
|
||||
@@ -195,21 +209,33 @@ func compareCephStatus(actual, when string) (bool, error) {
|
||||
}
|
||||
}
|
||||
|
||||
func detailedCephMessage(msg string, status CephStatus) string {
|
||||
func detailedCephMessage(outcomeMessage string, status CephStatus) string {
|
||||
var msg = []string{}
|
||||
|
||||
if outcomeMessage != "" {
|
||||
msg = append(msg, outcomeMessage)
|
||||
}
|
||||
|
||||
if status.OsdMap.OsdMap.NumOsd > 0 {
|
||||
msg = fmt.Sprintf("%s. %v/%v OSDs up", msg, status.OsdMap.OsdMap.NumUpOsd, status.OsdMap.OsdMap.NumOsd)
|
||||
msg = append(msg, fmt.Sprintf("%v/%v OSDs up", status.OsdMap.OsdMap.NumUpOsd, status.OsdMap.OsdMap.NumOsd))
|
||||
}
|
||||
|
||||
if status.OsdMap.OsdMap.Full {
|
||||
msg = fmt.Sprintf("%s. OSD disk is full", msg)
|
||||
msg = append(msg, fmt.Sprintf("OSD disk is full"))
|
||||
} else if status.OsdMap.OsdMap.NearFull {
|
||||
msg = fmt.Sprintf("%s. OSD disk is nearly full", msg)
|
||||
msg = append(msg, fmt.Sprintf("OSD disk is nearly full"))
|
||||
}
|
||||
|
||||
if status.PgMap.TotalBytes > 0 {
|
||||
pgUsage := 100 * float64(status.PgMap.UsedBytes) / float64(status.PgMap.TotalBytes)
|
||||
msg = fmt.Sprintf("%s. PG storage usage is %.1f%%.", msg, pgUsage)
|
||||
msg = append(msg, fmt.Sprintf("PG storage usage is %.1f%%", pgUsage))
|
||||
}
|
||||
|
||||
return msg
|
||||
if status.Health.Checks != nil {
|
||||
for k, v := range status.Health.Checks {
|
||||
msg = append(msg, fmt.Sprintf("%s: %s", k, v.Summary.Message))
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(msg, "\n")
|
||||
}
|
||||
|
||||
@@ -56,7 +56,7 @@ func Test_cephStatus(t *testing.T) {
|
||||
IsWarn: true,
|
||||
IsFail: false,
|
||||
Title: "Ceph Status",
|
||||
Message: "Ceph status is HEALTH_WARN. 5/5 OSDs up. OSD disk is nearly full. PG storage usage is 85.0%.",
|
||||
Message: "Ceph status is HEALTH_WARN\n5/5 OSDs up\nOSD disk is nearly full\nPG storage usage is 85.0%",
|
||||
URI: "https://rook.io/docs/rook/v1.4/ceph-common-issues.html",
|
||||
IconKey: "rook",
|
||||
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
|
||||
@@ -89,7 +89,7 @@ func Test_cephStatus(t *testing.T) {
|
||||
IsWarn: false,
|
||||
IsFail: true,
|
||||
Title: "Ceph Status",
|
||||
Message: "Ceph status is HEALTH_ERR. 4/5 OSDs up. OSD disk is full. PG storage usage is 95.0%.",
|
||||
Message: "Ceph status is HEALTH_ERR\n4/5 OSDs up\nOSD disk is full\nPG storage usage is 95.0%",
|
||||
URI: "https://rook.io/docs/rook/v1.4/ceph-common-issues.html",
|
||||
IconKey: "rook",
|
||||
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
|
||||
@@ -172,7 +172,7 @@ func Test_cephStatus(t *testing.T) {
|
||||
IsWarn: false,
|
||||
IsFail: true,
|
||||
Title: "Ceph Status",
|
||||
Message: "custom message WARN. 5/5 OSDs up. OSD disk is nearly full. PG storage usage is 85.0%.",
|
||||
Message: "custom message WARN\n5/5 OSDs up\nOSD disk is nearly full\nPG storage usage is 85.0%",
|
||||
URI: "custom uri WARN",
|
||||
IconKey: "rook",
|
||||
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
|
||||
@@ -218,6 +218,43 @@ func Test_cephStatus(t *testing.T) {
|
||||
}
|
||||
}`,
|
||||
},
|
||||
{
|
||||
name: "warn case with multiple health status messages",
|
||||
analyzer: troubleshootv1beta2.CephStatusAnalyze{},
|
||||
expectResult: AnalyzeResult{
|
||||
IsPass: false,
|
||||
IsWarn: true,
|
||||
IsFail: false,
|
||||
Title: "Ceph Status",
|
||||
Message: "Ceph status is HEALTH_WARN\nPOOL_NO_REDUNDANCY: 11 pool(s) have no replicas configured\nPOOL_PG_NUM_NOT_POWER_OF_TWO: 8 pool(s) have non-power-of-two pg_num",
|
||||
URI: "https://rook.io/docs/rook/v1.4/ceph-common-issues.html",
|
||||
IconKey: "rook",
|
||||
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
|
||||
},
|
||||
filePath: "ceph/status.json",
|
||||
file: `{
|
||||
"fsid": "96a8178c-6aa2-4adf-a309-9e8869a79611",
|
||||
"health": {
|
||||
"status": "HEALTH_WARN",
|
||||
"checks": {
|
||||
"POOL_NO_REDUNDANCY": {
|
||||
"severity": "HEALTH_WARN",
|
||||
"summary": {
|
||||
"message": "11 pool(s) have no replicas configured",
|
||||
"count": 11
|
||||
},
|
||||
"muted": false
|
||||
},
|
||||
"POOL_PG_NUM_NOT_POWER_OF_TWO": {
|
||||
"severity": "HEALTH_WARN",
|
||||
"summary": {
|
||||
"message": "8 pool(s) have non-power-of-two pg_num"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
||||
Reference in New Issue
Block a user