Merge pull request #575 from replicatedhq/diamonwiggins/sc-40946/ceph-analyzer-health-status

Ceph health status messages in Analzyer result
This commit is contained in:
Diamon Wiggins
2022-05-13 20:05:23 -04:00
committed by GitHub
2 changed files with 73 additions and 10 deletions

View File

@@ -74,7 +74,17 @@ type CephStatus struct {
}
type HealthStatus struct {
Status string `json:"status"`
Status string `json:"status"`
Checks map[string]CheckMessage `json:"checks"`
}
type CheckMessage struct {
Severity string `json:"severity"`
Summary Summary `json:"summary"`
}
type Summary struct {
Message string `json:"message"`
}
type OsdMap struct {
@@ -121,6 +131,7 @@ func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFil
if outcome.Fail.When == "" {
outcome.Fail.When = string(CephHealthErr)
}
match, err := compareCephStatus(status.Health.Status, outcome.Fail.When)
if err != nil {
return nil, errors.Wrap(err, "failed to compare ceph status")
@@ -134,6 +145,7 @@ func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFil
if outcome.Warn.When == "" {
outcome.Warn.When = string(CephHealthWarn)
}
match, err := compareCephStatus(status.Health.Status, outcome.Warn.When)
if err != nil {
return nil, errors.Wrap(err, "failed to compare ceph status")
@@ -147,6 +159,7 @@ func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFil
if outcome.Pass.When == "" {
outcome.Pass.When = string(CephHealthOK)
}
match, err := compareCephStatus(status.Health.Status, outcome.Pass.When)
if err != nil {
return nil, errors.Wrap(err, "failed to compare ceph status")
@@ -154,6 +167,7 @@ func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFil
analyzeResult.IsPass = true
analyzeResult.Message = outcome.Pass.Message
analyzeResult.URI = outcome.Pass.URI
return analyzeResult, nil
}
}
@@ -195,21 +209,33 @@ func compareCephStatus(actual, when string) (bool, error) {
}
}
func detailedCephMessage(msg string, status CephStatus) string {
func detailedCephMessage(outcomeMessage string, status CephStatus) string {
var msg = []string{}
if outcomeMessage != "" {
msg = append(msg, outcomeMessage)
}
if status.OsdMap.OsdMap.NumOsd > 0 {
msg = fmt.Sprintf("%s. %v/%v OSDs up", msg, status.OsdMap.OsdMap.NumUpOsd, status.OsdMap.OsdMap.NumOsd)
msg = append(msg, fmt.Sprintf("%v/%v OSDs up", status.OsdMap.OsdMap.NumUpOsd, status.OsdMap.OsdMap.NumOsd))
}
if status.OsdMap.OsdMap.Full {
msg = fmt.Sprintf("%s. OSD disk is full", msg)
msg = append(msg, fmt.Sprintf("OSD disk is full"))
} else if status.OsdMap.OsdMap.NearFull {
msg = fmt.Sprintf("%s. OSD disk is nearly full", msg)
msg = append(msg, fmt.Sprintf("OSD disk is nearly full"))
}
if status.PgMap.TotalBytes > 0 {
pgUsage := 100 * float64(status.PgMap.UsedBytes) / float64(status.PgMap.TotalBytes)
msg = fmt.Sprintf("%s. PG storage usage is %.1f%%.", msg, pgUsage)
msg = append(msg, fmt.Sprintf("PG storage usage is %.1f%%", pgUsage))
}
return msg
if status.Health.Checks != nil {
for k, v := range status.Health.Checks {
msg = append(msg, fmt.Sprintf("%s: %s", k, v.Summary.Message))
}
}
return strings.Join(msg, "\n")
}

View File

@@ -56,7 +56,7 @@ func Test_cephStatus(t *testing.T) {
IsWarn: true,
IsFail: false,
Title: "Ceph Status",
Message: "Ceph status is HEALTH_WARN. 5/5 OSDs up. OSD disk is nearly full. PG storage usage is 85.0%.",
Message: "Ceph status is HEALTH_WARN\n5/5 OSDs up\nOSD disk is nearly full\nPG storage usage is 85.0%",
URI: "https://rook.io/docs/rook/v1.4/ceph-common-issues.html",
IconKey: "rook",
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
@@ -89,7 +89,7 @@ func Test_cephStatus(t *testing.T) {
IsWarn: false,
IsFail: true,
Title: "Ceph Status",
Message: "Ceph status is HEALTH_ERR. 4/5 OSDs up. OSD disk is full. PG storage usage is 95.0%.",
Message: "Ceph status is HEALTH_ERR\n4/5 OSDs up\nOSD disk is full\nPG storage usage is 95.0%",
URI: "https://rook.io/docs/rook/v1.4/ceph-common-issues.html",
IconKey: "rook",
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
@@ -172,7 +172,7 @@ func Test_cephStatus(t *testing.T) {
IsWarn: false,
IsFail: true,
Title: "Ceph Status",
Message: "custom message WARN. 5/5 OSDs up. OSD disk is nearly full. PG storage usage is 85.0%.",
Message: "custom message WARN\n5/5 OSDs up\nOSD disk is nearly full\nPG storage usage is 85.0%",
URI: "custom uri WARN",
IconKey: "rook",
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
@@ -218,6 +218,43 @@ func Test_cephStatus(t *testing.T) {
}
}`,
},
{
name: "warn case with multiple health status messages",
analyzer: troubleshootv1beta2.CephStatusAnalyze{},
expectResult: AnalyzeResult{
IsPass: false,
IsWarn: true,
IsFail: false,
Title: "Ceph Status",
Message: "Ceph status is HEALTH_WARN\nPOOL_NO_REDUNDANCY: 11 pool(s) have no replicas configured\nPOOL_PG_NUM_NOT_POWER_OF_TWO: 8 pool(s) have non-power-of-two pg_num",
URI: "https://rook.io/docs/rook/v1.4/ceph-common-issues.html",
IconKey: "rook",
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
},
filePath: "ceph/status.json",
file: `{
"fsid": "96a8178c-6aa2-4adf-a309-9e8869a79611",
"health": {
"status": "HEALTH_WARN",
"checks": {
"POOL_NO_REDUNDANCY": {
"severity": "HEALTH_WARN",
"summary": {
"message": "11 pool(s) have no replicas configured",
"count": 11
},
"muted": false
},
"POOL_PG_NUM_NOT_POWER_OF_TWO": {
"severity": "HEALTH_WARN",
"summary": {
"message": "8 pool(s) have non-power-of-two pg_num"
}
}
}
}
}`,
},
}
for _, test := range tests {