mirror of
https://github.com/kubereboot/kured.git
synced 2026-02-15 01:39:50 +00:00
Compare commits
890 Commits
force-rebo
...
1.14.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0df1059d66 | ||
|
|
14d887e9d0 | ||
|
|
8bc66c937d | ||
|
|
3639080851 | ||
|
|
fb51a566da | ||
|
|
9a4b8fdb32 | ||
|
|
3b9b190422 | ||
|
|
f22b1abd17 | ||
|
|
c159b37fcc | ||
|
|
351ca71787 | ||
|
|
16dc5e30d9 | ||
|
|
aa971697ff | ||
|
|
d019e7a50a | ||
|
|
ee81617645 | ||
|
|
d7adcf6e1e | ||
|
|
409ff0a3e6 | ||
|
|
3be3cd46b5 | ||
|
|
e8202c602c | ||
|
|
752176d16b | ||
|
|
d30a71e1d3 | ||
|
|
815df5e1e9 | ||
|
|
77327b3915 | ||
|
|
ec328e33d6 | ||
|
|
54e127c2ad | ||
|
|
1867c3253e | ||
|
|
05a3ff85a3 | ||
|
|
19846c73f2 | ||
|
|
ba62c32cbf | ||
|
|
4c75199b41 | ||
|
|
91eb403942 | ||
|
|
a27c755260 | ||
|
|
2a6d119b3b | ||
|
|
b17224addc | ||
|
|
a2f21ebe49 | ||
|
|
4d2f26f483 | ||
|
|
b358be7617 | ||
|
|
e88434b619 | ||
|
|
1b12e52434 | ||
|
|
64e40a62b0 | ||
|
|
6690396679 | ||
|
|
9acb2450ea | ||
|
|
e1a5b7d705 | ||
|
|
72f52f2c6f | ||
|
|
6df454c0eb | ||
|
|
c09e65eab1 | ||
|
|
a34c994f4b | ||
|
|
60c54bef31 | ||
|
|
8afa302680 | ||
|
|
de42273849 | ||
|
|
d3e2c9af95 | ||
|
|
00648786b7 | ||
|
|
c7f4380847 | ||
|
|
c659c25b94 | ||
|
|
f44ced2d04 | ||
|
|
e7d24bfff0 | ||
|
|
0378c8a8c5 | ||
|
|
2cfeb34c03 | ||
|
|
3bfacca254 | ||
|
|
46e1b9616b | ||
|
|
fe95f17503 | ||
|
|
462a063b6e | ||
|
|
e664de6c6f | ||
|
|
b666474cf1 | ||
|
|
64313f82ef | ||
|
|
59ba53584e | ||
|
|
b2ffc0d154 | ||
|
|
b7edf8b345 | ||
|
|
4e01e607cc | ||
|
|
1929c11297 | ||
|
|
28832f5cfb | ||
|
|
3c79c750e1 | ||
|
|
58afedd842 | ||
|
|
57783966db | ||
|
|
316a0ef4a3 | ||
|
|
7a86e65c69 | ||
|
|
efa0fe808d | ||
|
|
c2f97614dd | ||
|
|
e710e05658 | ||
|
|
4ff3378df5 | ||
|
|
002f331486 | ||
|
|
2993afb329 | ||
|
|
97e1f56008 | ||
|
|
4c9ed478d4 | ||
|
|
6e0af2f320 | ||
|
|
1ea3823069 | ||
|
|
0063141b89 | ||
|
|
3a1cfe395e | ||
|
|
ae3ab9f3e1 | ||
|
|
0b27a7ea80 | ||
|
|
2596dcdcab | ||
|
|
00c8b5254b | ||
|
|
6aa6a96b46 | ||
|
|
a7b155a78f | ||
|
|
031ceed1f1 | ||
|
|
0ceb062a47 | ||
|
|
a4fba5a5bc | ||
|
|
942f9d7eed | ||
|
|
fd58b79413 | ||
|
|
132215ee97 | ||
|
|
25662304c2 | ||
|
|
887b2e2427 | ||
|
|
6afa8513c8 | ||
|
|
94a4387407 | ||
|
|
9ab71c894f | ||
|
|
72eda8a7c3 | ||
|
|
7bb9b75e2a | ||
|
|
dfb8441078 | ||
|
|
0e0cf7fac1 | ||
|
|
06af12114d | ||
|
|
477f356571 | ||
|
|
ad1e9b8401 | ||
|
|
80628b1b79 | ||
|
|
30673c0391 | ||
|
|
35e7bf9897 | ||
|
|
f8551b6714 | ||
|
|
d87d585b9c | ||
|
|
6ff57552c7 | ||
|
|
36c78d94ce | ||
|
|
0bc867cf11 | ||
|
|
c6d9bf07e6 | ||
|
|
fb84fa8253 | ||
|
|
05414fb9d0 | ||
|
|
230fa45461 | ||
|
|
6aca815125 | ||
|
|
eed2df6493 | ||
|
|
ff773d96bd | ||
|
|
dd7081d58a | ||
|
|
504708e5cd | ||
|
|
77a57089f0 | ||
|
|
185761c024 | ||
|
|
a5c051d2e5 | ||
|
|
85c4fae0a6 | ||
|
|
99cfe852ba | ||
|
|
1a8eccc039 | ||
|
|
75478e8608 | ||
|
|
4098158c6d | ||
|
|
93bd87547a | ||
|
|
28a05ca941 | ||
|
|
5db3889bdc | ||
|
|
72c3113c56 | ||
|
|
6122192232 | ||
|
|
c8ba0d98dc | ||
|
|
80579a3a81 | ||
|
|
72c234e470 | ||
|
|
ae29a90f43 | ||
|
|
1e0e1e52d8 | ||
|
|
3299e8ebf7 | ||
|
|
f93a634bd1 | ||
|
|
c19045c344 | ||
|
|
2440b32c49 | ||
|
|
0fc09c6d31 | ||
|
|
ed8e3292f1 | ||
|
|
9ec2d83476 | ||
|
|
6456d50880 | ||
|
|
c9a229c02c | ||
|
|
d2d5ba4ee8 | ||
|
|
a7e227a259 | ||
|
|
67c0b9bfdb | ||
|
|
f008edd583 | ||
|
|
39451838aa | ||
|
|
60d0f90c80 | ||
|
|
3005892498 | ||
|
|
b74784d36a | ||
|
|
f9ff4b3ac6 | ||
|
|
eb877416ae | ||
|
|
16fd2db8fb | ||
|
|
0f2dff84cd | ||
|
|
bdd59a72f3 | ||
|
|
09bfa18f47 | ||
|
|
a713389ca5 | ||
|
|
6986342e12 | ||
|
|
064724b8f6 | ||
|
|
3f8b119b39 | ||
|
|
e5dc205494 | ||
|
|
c3c80834d0 | ||
|
|
8f96eb6688 | ||
|
|
16d33d96f7 | ||
|
|
590b6b87a4 | ||
|
|
53e17ec12a | ||
|
|
4485fb60b0 | ||
|
|
4d7ba069d3 | ||
|
|
0ff6390107 | ||
|
|
42bf7c5fd9 | ||
|
|
b39fa7f2b8 | ||
|
|
12a252532e | ||
|
|
121b5e8a12 | ||
|
|
e4512458de | ||
|
|
d0a3031b86 | ||
|
|
9db896fc6a | ||
|
|
91139a728a | ||
|
|
b0acc1e3e4 | ||
|
|
82e3c99964 | ||
|
|
857eecf36d | ||
|
|
0189c350d0 | ||
|
|
b340cd53e9 | ||
|
|
3aad30974a | ||
|
|
87ef8296b2 | ||
|
|
e3442b164b | ||
|
|
47f0b244fe | ||
|
|
6ee5fa8636 | ||
|
|
6b757de2ee | ||
|
|
ba1328ca12 | ||
|
|
8cabfb7d75 | ||
|
|
5ac4f7ec86 | ||
|
|
beecd839c2 | ||
|
|
e70af373fc | ||
|
|
cb4eccb22e | ||
|
|
fe6bea8c29 | ||
|
|
06fad838d4 | ||
|
|
15d0492e23 | ||
|
|
645c768001 | ||
|
|
1bbcbe93b7 | ||
|
|
c828d27fb2 | ||
|
|
e34f1a0947 | ||
|
|
50d024c3f9 | ||
|
|
e36a43c57c | ||
|
|
bce0bac183 | ||
|
|
d5217121ac | ||
|
|
82142f4d6a | ||
|
|
796014ab80 | ||
|
|
e5867b9f87 | ||
|
|
8343ddd9c5 | ||
|
|
92db607e89 | ||
|
|
c69a2449b2 | ||
|
|
e3032373ab | ||
|
|
5fd42b0085 | ||
|
|
9d28cac8b4 | ||
|
|
788e351a18 | ||
|
|
c20a5c2da9 | ||
|
|
8e42373fcb | ||
|
|
6966f628b9 | ||
|
|
41ae0b20a5 | ||
|
|
8f5b9abc19 | ||
|
|
774dc69e3b | ||
|
|
9377948f62 | ||
|
|
4d6cac66a6 | ||
|
|
9d4ebfc1f8 | ||
|
|
ad781ad6f0 | ||
|
|
9525ce53a3 | ||
|
|
c5bb9ae478 | ||
|
|
8cf12fa24e | ||
|
|
98fdb334aa | ||
|
|
87eda823e7 | ||
|
|
9788dba4f3 | ||
|
|
777f5b2cce | ||
|
|
055de3a949 | ||
|
|
7bea9d53c1 | ||
|
|
aa5a3f0ba9 | ||
|
|
8230add524 | ||
|
|
10d42b07a5 | ||
|
|
5a59c2f504 | ||
|
|
9c56b28282 | ||
|
|
dd0bce41be | ||
|
|
67c50b27ab | ||
|
|
e2e6e86e0c | ||
|
|
5aaa1e01bc | ||
|
|
00d5b4920a | ||
|
|
28c5332450 | ||
|
|
71b3f1dd7f | ||
|
|
95aee6828c | ||
|
|
66ce93ef09 | ||
|
|
1e76d65d00 | ||
|
|
f802373e0f | ||
|
|
6c34fee96b | ||
|
|
8dfccdbe48 | ||
|
|
db62f4aa0e | ||
|
|
115fea9d2a | ||
|
|
0734e270fa | ||
|
|
08774994ad | ||
|
|
90d2d9a39b | ||
|
|
35a6b8955d | ||
|
|
641c319eb8 | ||
|
|
bee558cd8f | ||
|
|
78064e1d2c | ||
|
|
29560f15b3 | ||
|
|
500a8a1bbb | ||
|
|
9e441ebee6 | ||
|
|
34f0df2605 | ||
|
|
cd7c4f8da3 | ||
|
|
9407c3f8f6 | ||
|
|
da59ebff70 | ||
|
|
d2d21f31c0 | ||
|
|
6191c73a3c | ||
|
|
48d112ba32 | ||
|
|
b12ae4eccd | ||
|
|
50aac294b7 | ||
|
|
c3cb2bbc6c | ||
|
|
67e979c198 | ||
|
|
9be88fb878 | ||
|
|
4fcf6e184b | ||
|
|
aa5c3e7783 | ||
|
|
5ab20e62d2 | ||
|
|
03e8c2116a | ||
|
|
9415f301a2 | ||
|
|
4d4d3982c2 | ||
|
|
84fa914fe6 | ||
|
|
d1e8b1b1a5 | ||
|
|
3487860e06 | ||
|
|
d965e7f67e | ||
|
|
4ab3bf9813 | ||
|
|
7397365c51 | ||
|
|
d771013cde | ||
|
|
195f4f0bee | ||
|
|
15735cd933 | ||
|
|
c44ecff3e5 | ||
|
|
1020e7179a | ||
|
|
5ff221b5b6 | ||
|
|
c7b5520859 | ||
|
|
bbdce6abe5 | ||
|
|
5b11ebcc3a | ||
|
|
8543cf25a2 | ||
|
|
6691996bc0 | ||
|
|
eb4acc69bf | ||
|
|
302578467d | ||
|
|
99e7b71ba4 | ||
|
|
e38d153fe7 | ||
|
|
7f6d4a1846 | ||
|
|
07208ef84b | ||
|
|
d6964180ca | ||
|
|
966698f3c6 | ||
|
|
445310b9b7 | ||
|
|
1eec15b5dd | ||
|
|
238e6993f3 | ||
|
|
1ca0203db2 | ||
|
|
9ddad78071 | ||
|
|
4918203ea9 | ||
|
|
640efa56b8 | ||
|
|
67232f00d9 | ||
|
|
ce32f9dc05 | ||
|
|
d82d295f2d | ||
|
|
580279f419 | ||
|
|
87508eb778 | ||
|
|
7d3b97541d | ||
|
|
93d6a783a1 | ||
|
|
b7494f5f80 | ||
|
|
8e1933cd28 | ||
|
|
96bf7c1add | ||
|
|
178ba93b5a | ||
|
|
f3ed0087d2 | ||
|
|
71a273a14c | ||
|
|
2b36eab0f8 | ||
|
|
aefd901b4e | ||
|
|
91b01b5524 | ||
|
|
f1255bff91 | ||
|
|
22a76f0da2 | ||
|
|
b52a9587f3 | ||
|
|
a6e1cf8191 | ||
|
|
d7576dce0f | ||
|
|
661af3b042 | ||
|
|
eec8ca1f9b | ||
|
|
15356fa26d | ||
|
|
7e3565a565 | ||
|
|
a3bc03b4b9 | ||
|
|
22ce5a2628 | ||
|
|
0f80b70478 | ||
|
|
28be690849 | ||
|
|
84292cc8c3 | ||
|
|
21b54227a7 | ||
|
|
8e3fb55ec4 | ||
|
|
1a6592851e | ||
|
|
bba3b8d83f | ||
|
|
9c6d6a6d82 | ||
|
|
997794eaac | ||
|
|
0763cdd95a | ||
|
|
c004566e97 | ||
|
|
077ef2488e | ||
|
|
06093ab53b | ||
|
|
4d2019c07f | ||
|
|
687aeda813 | ||
|
|
acddd6b675 | ||
|
|
54e7d93902 | ||
|
|
2666b49d01 | ||
|
|
ff1a27ba8b | ||
|
|
38ed636ecf | ||
|
|
8324b09bb9 | ||
|
|
fb8677e7ac | ||
|
|
bdd16d4e01 | ||
|
|
16e6d3c4d3 | ||
|
|
af824bfd6a | ||
|
|
8264a529d6 | ||
|
|
cd25017d67 | ||
|
|
4c1a23a047 | ||
|
|
8f86e1d4f8 | ||
|
|
79e19d84ba | ||
|
|
01396db3d1 | ||
|
|
d3b59b8922 | ||
|
|
eafe2c3d98 | ||
|
|
e4f1c7358c | ||
|
|
348b5b4c96 | ||
|
|
c8a3a6ff9d | ||
|
|
c196d4e97f | ||
|
|
efc98c8813 | ||
|
|
b108aa4d2d | ||
|
|
2ae0a82510 | ||
|
|
f95664156d | ||
|
|
891afda596 | ||
|
|
2b89170417 | ||
|
|
de59c2614d | ||
|
|
2e5cb81b4c | ||
|
|
fde91041d5 | ||
|
|
8a3f486ad9 | ||
|
|
513db7ce8c | ||
|
|
938cbd428c | ||
|
|
fa28b550b2 | ||
|
|
164183e1bc | ||
|
|
7d0499cc0a | ||
|
|
5e32864e0b | ||
|
|
718faf4d31 | ||
|
|
ac9e669b52 | ||
|
|
7c33ad8b6e | ||
|
|
6f8d36e8db | ||
|
|
688346e811 | ||
|
|
079425349d | ||
|
|
d7589b16d7 | ||
|
|
bab1425e1a | ||
|
|
4e1c05c5e3 | ||
|
|
2c7ca8261f | ||
|
|
6ebf9a96f9 | ||
|
|
adffa11796 | ||
|
|
1152d72d51 | ||
|
|
fb6a224f66 | ||
|
|
c671dce161 | ||
|
|
f8fc6e5017 | ||
|
|
effbf62987 | ||
|
|
6423bf0069 | ||
|
|
9c81caa92e | ||
|
|
978acba030 | ||
|
|
acef34e916 | ||
|
|
f72ef8c2ca | ||
|
|
3c2508050d | ||
|
|
483a5d8211 | ||
|
|
9b89a8c0fc | ||
|
|
b5a4bf432c | ||
|
|
cee15cfc32 | ||
|
|
b2b1940435 | ||
|
|
a9eb139f60 | ||
|
|
d6e478ec6b | ||
|
|
0955403470 | ||
|
|
a3f9796305 | ||
|
|
9473f831be | ||
|
|
3682eb36de | ||
|
|
3900ee8876 | ||
|
|
4c31084be8 | ||
|
|
6c9ee57dc1 | ||
|
|
3c5eb968d3 | ||
|
|
54c0e4e25f | ||
|
|
afac9d435a | ||
|
|
6af3f1abc1 | ||
|
|
a48da239bc | ||
|
|
c7d5810503 | ||
|
|
6e16e993d9 | ||
|
|
24f4925b3f | ||
|
|
c0333d186e | ||
|
|
7a2b4a6a1a | ||
|
|
fb7a7feb15 | ||
|
|
ffddfd7add | ||
|
|
a0bc7daa32 | ||
|
|
fd6f520b6e | ||
|
|
c2f275ebd0 | ||
|
|
01b0ca8cea | ||
|
|
aa45139b80 | ||
|
|
1654b75ec4 | ||
|
|
e4da44a774 | ||
|
|
e301908ae8 | ||
|
|
f442c6b632 | ||
|
|
8fc0a9daf2 | ||
|
|
4d783e4321 | ||
|
|
11f077f689 | ||
|
|
807b727ab3 | ||
|
|
c826d73695 | ||
|
|
5193f2de16 | ||
|
|
310c6c114d | ||
|
|
e1017f47fb | ||
|
|
42f69c7b1e | ||
|
|
e3f4a88a07 | ||
|
|
48dc84b3e6 | ||
|
|
816c732f39 | ||
|
|
0bd22c7c56 | ||
|
|
2850417e48 | ||
|
|
4f8e9a0761 | ||
|
|
0cbc2d58d2 | ||
|
|
11a62c8ce8 | ||
|
|
89d1fe497c | ||
|
|
870329c7b4 | ||
|
|
78bb9d6c14 | ||
|
|
c035259d0a | ||
|
|
d08b42933d | ||
|
|
729fa658dc | ||
|
|
d7377bff1b | ||
|
|
42e4c317ae | ||
|
|
5061a611a8 | ||
|
|
eca6da173c | ||
|
|
7582e166be | ||
|
|
de23444a5f | ||
|
|
4d5ea21db3 | ||
|
|
bb56c731bb | ||
|
|
ea6844d315 | ||
|
|
247e6f6c70 | ||
|
|
43a7a1a1ca | ||
|
|
803ecef1de | ||
|
|
0eb318c1b2 | ||
|
|
6a7494fda5 | ||
|
|
7b44fd2eb8 | ||
|
|
3f322dfbb2 | ||
|
|
4a11a95b86 | ||
|
|
0b759a9ff6 | ||
|
|
496d2b26d8 | ||
|
|
c1a9de6622 | ||
|
|
79f22cee67 | ||
|
|
83415d0e59 | ||
|
|
8046977d1b | ||
|
|
240a669727 | ||
|
|
f7b3de36a6 | ||
|
|
4c4508a586 | ||
|
|
4e4c29aec0 | ||
|
|
59d5266005 | ||
|
|
25dcf3cb12 | ||
|
|
5a86ef40e8 | ||
|
|
2400f34cc0 | ||
|
|
8db5650510 | ||
|
|
390f6e9f99 | ||
|
|
65292983f2 | ||
|
|
120bf713c0 | ||
|
|
d2c9ef8cba | ||
|
|
9030f56648 | ||
|
|
1c13476b49 | ||
|
|
cd7976ce4f | ||
|
|
8dfe5f2486 | ||
|
|
f1c5608bcd | ||
|
|
c2122f3924 | ||
|
|
babc9095ef | ||
|
|
5305d7b34d | ||
|
|
9583df2e50 | ||
|
|
56a26a2f25 | ||
|
|
3fa1f3feec | ||
|
|
21fdba4ef0 | ||
|
|
4d45fa8bdb | ||
|
|
e09359e46c | ||
|
|
770eb1e4f8 | ||
|
|
694957d56e | ||
|
|
85c42fdb81 | ||
|
|
3671c27e37 | ||
|
|
5930d733f8 | ||
|
|
fd63e9a74b | ||
|
|
837bd4eb2a | ||
|
|
2a95f0b6c8 | ||
|
|
15c57927c8 | ||
|
|
20cbf6112d | ||
|
|
f668bdb1ba | ||
|
|
8209647e69 | ||
|
|
46354837f9 | ||
|
|
de2e0bb2c8 | ||
|
|
2b88b72d38 | ||
|
|
87e610c25f | ||
|
|
fe4ad73c2d | ||
|
|
f6ada05c5d | ||
|
|
355813de30 | ||
|
|
8a5f69480b | ||
|
|
1e0fc11b01 | ||
|
|
2218e29504 | ||
|
|
250b9bad05 | ||
|
|
32e01a8417 | ||
|
|
baf83408b8 | ||
|
|
93c8242b89 | ||
|
|
c3d4c36493 | ||
|
|
1fd09dd572 | ||
|
|
d21a438197 | ||
|
|
3fdd1cf6f7 | ||
|
|
48688044d5 | ||
|
|
640613565d | ||
|
|
763695de5c | ||
|
|
6ff5722728 | ||
|
|
472934e958 | ||
|
|
b7f29c76ce | ||
|
|
fa4e458f1f | ||
|
|
4fc93d550d | ||
|
|
6eb9050156 | ||
|
|
d8b7669ab4 | ||
|
|
d52d78a303 | ||
|
|
6a8e3f1e98 | ||
|
|
b39c9011ea | ||
|
|
fade706cbf | ||
|
|
5a4e197d27 | ||
|
|
1320c5d318 | ||
|
|
0640683fbb | ||
|
|
ec1a931a39 | ||
|
|
36308cee91 | ||
|
|
b733d00550 | ||
|
|
56e2c12d38 | ||
|
|
48e7ff28bf | ||
|
|
14fcc7bf37 | ||
|
|
5b4e5b8533 | ||
|
|
0162288ecf | ||
|
|
2e09425a45 | ||
|
|
5cbca18377 | ||
|
|
86fe6ff03e | ||
|
|
a3b782f86b | ||
|
|
14269023e8 | ||
|
|
a1e443a9f3 | ||
|
|
a2cc24e656 | ||
|
|
77ca6fda07 | ||
|
|
3a35d6a46c | ||
|
|
e430b1442a | ||
|
|
b3e39418ba | ||
|
|
34446f949e | ||
|
|
10d95c426f | ||
|
|
e4c684c3af | ||
|
|
204a06ca38 | ||
|
|
48897eb0ab | ||
|
|
84407690c6 | ||
|
|
d4893d7bd7 | ||
|
|
897834a9db | ||
|
|
996b1459b1 | ||
|
|
251c3c8503 | ||
|
|
0bb0cd168b | ||
|
|
e716e9c2b4 | ||
|
|
1362eafb33 | ||
|
|
c68937b5ff | ||
|
|
e878e0e5b3 | ||
|
|
525f04b492 | ||
|
|
c7542a5d21 | ||
|
|
170a792112 | ||
|
|
ea57673373 | ||
|
|
277a8e30cd | ||
|
|
bd0d901d22 | ||
|
|
dcd5ec5325 | ||
|
|
09d44c9ac1 | ||
|
|
8a9ae1ee9d | ||
|
|
67c9cc0fa7 | ||
|
|
111d1a1a98 | ||
|
|
6e6ad21b70 | ||
|
|
db9e716a55 | ||
|
|
e40a925cf0 | ||
|
|
e2dd29748d | ||
|
|
8ab4d7390e | ||
|
|
a13dfbb538 | ||
|
|
c9de90c96d | ||
|
|
b4c8b64c2d | ||
|
|
8344015019 | ||
|
|
31bb5363b2 | ||
|
|
4894a86f32 | ||
|
|
21eabe2fa6 | ||
|
|
459d7c53aa | ||
|
|
40f5eac8aa | ||
|
|
1b54c4bc04 | ||
|
|
ef7a7f6320 | ||
|
|
876f72fa50 | ||
|
|
c62fa36259 | ||
|
|
ba54b199b8 | ||
|
|
679f45c321 | ||
|
|
de4e9a9bd9 | ||
|
|
81ee206a87 | ||
|
|
77594f2e31 | ||
|
|
36a9e4e3d6 | ||
|
|
98b547a66e | ||
|
|
1165cfe6f4 | ||
|
|
91a8ed0638 | ||
|
|
67ea5922f4 | ||
|
|
88b8b5d223 | ||
|
|
645ca7f88f | ||
|
|
f152a15552 | ||
|
|
470b887ea4 | ||
|
|
9ca74a6062 | ||
|
|
7f379ac920 | ||
|
|
fa9991f929 | ||
|
|
596394db79 | ||
|
|
c9367eeff5 | ||
|
|
8ad6bb7c24 | ||
|
|
722db47b2d | ||
|
|
3f2027da32 | ||
|
|
51cab0dedc | ||
|
|
f059cec794 | ||
|
|
2fef8b1b12 | ||
|
|
e9a7c4535a | ||
|
|
da60edbe7b | ||
|
|
a6040aa12d | ||
|
|
685f32881b | ||
|
|
1931b9b939 | ||
|
|
32aa77f75b | ||
|
|
4b1e4036e3 | ||
|
|
1ba3acab98 | ||
|
|
038a3412b1 | ||
|
|
84b62ba7ec | ||
|
|
cc4a4f5161 | ||
|
|
8a29c218da | ||
|
|
64202ff440 | ||
|
|
08ae57579c | ||
|
|
2e5ea66e91 | ||
|
|
7461ab8d95 | ||
|
|
aa49cfd8c4 | ||
|
|
4c31184422 | ||
|
|
8a0f38ac2a | ||
|
|
6177c3a996 | ||
|
|
5d88e6c6db | ||
|
|
7091debe23 | ||
|
|
ce6075c800 | ||
|
|
4ed5b823fc | ||
|
|
cadb6c263f | ||
|
|
d5fe4fbaec | ||
|
|
9648d1d759 | ||
|
|
e5a2d4acc7 | ||
|
|
e1ba9a975e | ||
|
|
19bf5bf224 | ||
|
|
72c4112e20 | ||
|
|
2f740b7f9a | ||
|
|
cf58d9a777 | ||
|
|
553e061b94 | ||
|
|
598964b0f6 | ||
|
|
b0bd603931 | ||
|
|
8961cbf262 | ||
|
|
13708ad1dc | ||
|
|
83dccea063 | ||
|
|
10e8dbda97 | ||
|
|
a6c0a4a7cb | ||
|
|
8b4b92f237 | ||
|
|
ba8c830d63 | ||
|
|
19d7dee4aa | ||
|
|
7408cebb6b | ||
|
|
cd0f3cee12 | ||
|
|
dadf2cdd48 | ||
|
|
15c5c47b49 | ||
|
|
012e680061 | ||
|
|
1cab9a1d28 | ||
|
|
6793b0882f | ||
|
|
ad8c0053e2 | ||
|
|
549bb9b415 | ||
|
|
3ebc224958 | ||
|
|
3d75f1b37a | ||
|
|
16109017ce | ||
|
|
b024898ed6 | ||
|
|
19b177372e | ||
|
|
50f8d037fc | ||
|
|
aea93fd0ac | ||
|
|
4dd15e3874 | ||
|
|
c1abad8a92 | ||
|
|
25491c801b | ||
|
|
9e42c8ec15 | ||
|
|
29eb862335 | ||
|
|
d7e58bef3e | ||
|
|
949aa3acf7 | ||
|
|
2762837dac | ||
|
|
d507361a45 | ||
|
|
1d1f22c93b | ||
|
|
644aca3fa0 | ||
|
|
59b078f38d | ||
|
|
36cef41c20 | ||
|
|
eb617adc2b | ||
|
|
2afd04ddd3 | ||
|
|
3eb7f17b3a | ||
|
|
a87e7b28d2 | ||
|
|
c6f341ec16 | ||
|
|
14bda85a03 | ||
|
|
d6f7609081 | ||
|
|
2b4830a0f6 | ||
|
|
688ba8ef72 | ||
|
|
aebe9463fd | ||
|
|
900f58ae2d | ||
|
|
b177a9f6eb | ||
|
|
8fafad18bb | ||
|
|
cc806b886c | ||
|
|
b78ba8e73b | ||
|
|
1ddd45d90b | ||
|
|
a815867584 | ||
|
|
d271165496 | ||
|
|
2bb7b7937e | ||
|
|
2afa0a9da7 | ||
|
|
ab0b5d137c | ||
|
|
d3ea5639f4 | ||
|
|
02bb6d650e | ||
|
|
c473caafc8 | ||
|
|
a574a67c61 | ||
|
|
4420dc82d6 | ||
|
|
cdbcf8d4a0 | ||
|
|
3508110f52 | ||
|
|
ec75533394 | ||
|
|
edeefcd2b9 | ||
|
|
cf03bc587c | ||
|
|
59a6700add | ||
|
|
64ebf53264 | ||
|
|
615e3d4840 | ||
|
|
1257d97ead | ||
|
|
1fc2522c0f | ||
|
|
7fb16fed9b | ||
|
|
72a31030db | ||
|
|
f2a0f8e20d | ||
|
|
c571a775df | ||
|
|
92ca18ff6e | ||
|
|
4c66c69b77 | ||
|
|
779a9e27d4 | ||
|
|
ecd73fc68f | ||
|
|
8e73cf224d | ||
|
|
3fbc98ed57 | ||
|
|
800e9e19fb | ||
|
|
a2c5eb7b57 | ||
|
|
247863109d | ||
|
|
bdd20c963c | ||
|
|
ff73068789 | ||
|
|
d6e7a42c0d | ||
|
|
2392170b8e | ||
|
|
0a419d0d34 | ||
|
|
b75aec87d7 | ||
|
|
7ef25be46c | ||
|
|
67499fbdf2 | ||
|
|
c1fd783913 | ||
|
|
834e7d191d | ||
|
|
b1d0571298 | ||
|
|
63b523e71d | ||
|
|
7e7430f7df | ||
|
|
2a0ad5384d | ||
|
|
43737e35d5 | ||
|
|
b337c9d6be | ||
|
|
95ec750a31 | ||
|
|
fdc960b84d | ||
|
|
72a35388a6 | ||
|
|
5dd0de8f78 | ||
|
|
37a5ece356 | ||
|
|
7975a78025 | ||
|
|
f86514c1e6 | ||
|
|
f6e4062e13 | ||
|
|
f2ae01120a | ||
|
|
db7b42f5ee | ||
|
|
5c21206bdb | ||
|
|
4beddb5338 | ||
|
|
7944fd2639 | ||
|
|
d1315c691e | ||
|
|
57ef32d2d1 | ||
|
|
4d2b8763d3 | ||
|
|
b4fc6bac27 | ||
|
|
423f4afc9c | ||
|
|
8d809333b3 | ||
|
|
6a65a535c0 | ||
|
|
26e1ab8678 | ||
|
|
b16c3278b3 | ||
|
|
ddfb4119b7 | ||
|
|
a5a0435e41 | ||
|
|
f41b121b14 | ||
|
|
97e4963583 | ||
|
|
fc94716ffe | ||
|
|
c20c9e987b | ||
|
|
10443c5178 | ||
|
|
1b3d84d360 | ||
|
|
50136cd865 | ||
|
|
69b509f246 | ||
|
|
556789e6c7 | ||
|
|
0127675514 | ||
|
|
7249f1c573 | ||
|
|
357687b053 | ||
|
|
b1370be8f3 | ||
|
|
de3593799f | ||
|
|
3a89e3e058 | ||
|
|
972ef4240b | ||
|
|
334437cdc0 | ||
|
|
7d85e85db0 | ||
|
|
3f6713fa65 | ||
|
|
170a3a4bac | ||
|
|
521e15cc73 | ||
|
|
84be7929d1 | ||
|
|
06b22bc3ad | ||
|
|
f6f9e7492c | ||
|
|
114c34950b | ||
|
|
048bba446f | ||
|
|
dcddbfffdc | ||
|
|
8f8cda2e74 | ||
|
|
c8bb178fb9 | ||
|
|
0cd450b7bc | ||
|
|
3caedb0ab8 | ||
|
|
8ed1b575d9 | ||
|
|
56cd1f0a49 | ||
|
|
028109d203 | ||
|
|
97c58c775e | ||
|
|
12cd5cda87 | ||
|
|
4322c3212a | ||
|
|
02d9b54125 | ||
|
|
549be7711f | ||
|
|
ebf0fb5119 | ||
|
|
5731b98a8a | ||
|
|
efbb0c3e0d | ||
|
|
908998a70b | ||
|
|
bc3f28d112 | ||
|
|
826fcd2998 | ||
|
|
b3f9ddf402 | ||
|
|
c42fff3005 | ||
|
|
aefc4ea498 | ||
|
|
fd752b834a | ||
|
|
b2aa608294 | ||
|
|
170f2ed2fb | ||
|
|
152534fcc8 | ||
|
|
741b27cd35 | ||
|
|
0e75f74ca8 | ||
|
|
11780f008a |
@@ -1,20 +0,0 @@
|
||||
version: 2
|
||||
jobs:
|
||||
build:
|
||||
working_directory: /go/src/github.com/weaveworks/kured
|
||||
docker:
|
||||
- image: circleci/golang:1.8
|
||||
steps:
|
||||
- checkout
|
||||
- setup_remote_docker
|
||||
- run: go get github.com/golang/dep/cmd/dep
|
||||
- run: dep ensure
|
||||
- run: make
|
||||
|
||||
- deploy:
|
||||
name: Maybe push master images
|
||||
command: |
|
||||
if [ -z "${CIRCLE_TAG}" -a "${CIRCLE_BRANCH}" == "master" ]; then
|
||||
docker login -u "$DOCKER_USER" -p "$DOCKER_PASS" quay.io
|
||||
make publish-image
|
||||
fi
|
||||
3
.clomonitor.yml
Normal file
3
.clomonitor.yml
Normal file
@@ -0,0 +1,3 @@
|
||||
exemptions:
|
||||
- check: analytics
|
||||
reason: "We don't track people"
|
||||
21
.github/dependabot.yml
vendored
Normal file
21
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
version: 2
|
||||
updates:
|
||||
# Maintain dependencies for GitHub Actions
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
# Maintain dependencies for gomod
|
||||
- package-ecosystem: "gomod"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
ignore:
|
||||
- dependency-name: "k8s.io/api"
|
||||
- dependency-name: "k8s.io/apimachinery"
|
||||
- dependency-name: "k8s.io/client-go"
|
||||
- dependency-name: "k8s.io/kubectl"
|
||||
- package-ecosystem: "docker"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
13
.github/kind-cluster-1.26.yaml
vendored
Normal file
13
.github/kind-cluster-1.26.yaml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.26.6"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.26.6"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.26.6"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.26.6"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.26.6"
|
||||
13
.github/kind-cluster-1.27.yaml
vendored
Normal file
13
.github/kind-cluster-1.27.yaml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.27.3"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.27.3"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.27.3"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.27.3"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.27.3"
|
||||
13
.github/kind-cluster-1.28.yaml
vendored
Normal file
13
.github/kind-cluster-1.28.yaml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.28.0"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.28.0"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.28.0"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.28.0"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.28.0"
|
||||
28
.github/scripts/goreleaser-install.sh
vendored
Normal file
28
.github/scripts/goreleaser-install.sh
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
RELEASES_URL="https://github.com/goreleaser/goreleaser/releases"
|
||||
FILE_BASENAME="goreleaser"
|
||||
|
||||
test -z "$VERSION" && {
|
||||
echo "Unable to get goreleaser version." >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
test -z "$TMPDIR" && TMPDIR="$(mktemp -d)"
|
||||
TAR_FILE="$TMPDIR/${FILE_BASENAME}_$(uname -s)_$(uname -m).tar.gz"
|
||||
export TAR_FILE
|
||||
|
||||
(
|
||||
echo "Downloading GoReleaser $VERSION..."
|
||||
curl -sfLo "$TAR_FILE" \
|
||||
"$RELEASES_URL/download/$VERSION/${FILE_BASENAME}_$(uname -s)_$(uname -m).tar.gz"
|
||||
cd "$TMPDIR"
|
||||
curl -sfLo "checksums.txt" "$RELEASES_URL/download/$VERSION/checksums.txt"
|
||||
echo "Verifying checksums..."
|
||||
sha256sum --ignore-missing --quiet --check checksums.txt
|
||||
)
|
||||
|
||||
tar -xf "$TAR_FILE" -O goreleaser > "$TMPDIR/goreleaser"
|
||||
rm "$TMPDIR/checksums.txt"
|
||||
rm "$TAR_FILE"
|
||||
75
.github/workflows/codeql.yml
vendored
Normal file
75
.github/workflows/codeql.yml
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
# For most projects, this workflow file will not need changing; you simply need
|
||||
# to commit it to your repository.
|
||||
#
|
||||
# You may wish to alter this file to override the set of languages analyzed,
|
||||
# or to provide custom queries or build logic.
|
||||
#
|
||||
# ******** NOTE ********
|
||||
# We have attempted to detect the languages in your repository. Please check
|
||||
# the `language` matrix defined below to confirm you have the correct set of
|
||||
# supported CodeQL languages.
|
||||
#
|
||||
name: "CodeQL"
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: [ "main" ]
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [ "main" ]
|
||||
schedule:
|
||||
- cron: '24 13 * * 3'
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: [ 'go' ]
|
||||
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
|
||||
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
|
||||
# Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
|
||||
# queries: security-extended,security-and-quality
|
||||
|
||||
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
|
||||
|
||||
# If the Autobuild fails above, remove it and uncomment the following three lines.
|
||||
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
|
||||
|
||||
# - run: |
|
||||
# echo "Run, Build Application using script"
|
||||
# ./location_of_script_within_repo/buildscript.sh
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
with:
|
||||
category: "/language:${{matrix.language}}"
|
||||
82
.github/workflows/on-main-push.yaml
vendored
Normal file
82
.github/workflows/on-main-push.yaml
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
# We publish every merged commit in the form of an image
|
||||
# named kured:<branch>-<short tag>
|
||||
name: Push image of latest main
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
|
||||
jobs:
|
||||
tag-scan-and-push-final-image:
|
||||
name: "Build, scan, and publish tagged image"
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: write
|
||||
packages: write
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@818d4b7b91585d195f67373fd9cb0332e31a7175
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Find current tag version
|
||||
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
|
||||
id: tags
|
||||
|
||||
- name: Setup GoReleaser
|
||||
run: make bootstrap-tools
|
||||
|
||||
- name: Build binaries
|
||||
run: make kured-release-snapshot
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build image
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/arm64, linux/amd64, linux/arm/v7, linux/arm/v6, linux/386
|
||||
push: true
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
tags: |
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }}
|
||||
|
||||
- name: Generate SBOM
|
||||
run: |
|
||||
.tmp/syft ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }} -o spdx > kured.sbom
|
||||
|
||||
- name: Sign and attest artifacts
|
||||
run: |
|
||||
.tmp/cosign sign -y -r ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }}
|
||||
|
||||
.tmp/cosign sign-blob -y --output-signature kured.sbom.sig --output-certificate kured.sbom.pem kured.sbom
|
||||
|
||||
.tmp/cosign attest -y --type spdx --predicate kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }}
|
||||
.tmp/cosign attach sbom --type spdx --sbom kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }}
|
||||
268
.github/workflows/on-pr.yaml
vendored
Normal file
268
.github/workflows/on-pr.yaml
vendored
Normal file
@@ -0,0 +1,268 @@
|
||||
name: PR
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
|
||||
jobs:
|
||||
pr-gotest:
|
||||
name: Run go tests
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
- name: run tests
|
||||
run: go test -json ./... > test.json
|
||||
- name: Annotate tests
|
||||
if: always()
|
||||
uses: guyarb/golang-test-annoations@v0.7.0
|
||||
with:
|
||||
test-results: test.json
|
||||
|
||||
pr-shellcheck:
|
||||
name: Lint bash code with shellcheck
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Run ShellCheck
|
||||
uses: bewuethr/shellcheck-action@v2
|
||||
|
||||
pr-lint-code:
|
||||
name: Lint golang code
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
- name: Lint cmd folder
|
||||
uses: Jerome1337/golint-action@v1.0.3
|
||||
with:
|
||||
golint-path: './cmd/...'
|
||||
- name: Lint pkg folder
|
||||
uses: Jerome1337/golint-action@v1.0.3
|
||||
with:
|
||||
golint-path: './pkg/...'
|
||||
|
||||
pr-check-docs-links:
|
||||
name: Check docs for incorrect links
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Link Checker
|
||||
uses: lycheeverse/lychee-action@ec3ed119d4f44ad2673a7232460dc7dff59d2421
|
||||
env:
|
||||
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
||||
with:
|
||||
args: --verbose --no-progress '*.md' '*.yaml' '*/*/*.go' --exclude-link-local
|
||||
fail: true
|
||||
|
||||
# This should not be made a mandatory test
|
||||
# It is only used to make us aware of any potential security failure, that
|
||||
# should trigger a bump of the image in build/.
|
||||
pr-vuln-scan:
|
||||
name: Build image and scan it against known vulnerabilities
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
- name: Setup GoReleaser
|
||||
run: make bootstrap-tools
|
||||
- name: Find current tag version
|
||||
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
|
||||
id: tags
|
||||
- name: Build image
|
||||
run: VERSION="${{ steps.tags.outputs.sha_short }}" make image
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@41f05d9ecffa2ed3f1580af306000f734b733e54
|
||||
with:
|
||||
image-ref: 'ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }}'
|
||||
format: 'table'
|
||||
exit-code: '1'
|
||||
ignore-unfixed: true
|
||||
vuln-type: 'os,library'
|
||||
severity: 'CRITICAL,HIGH'
|
||||
|
||||
# This ensures the latest code works with the manifests built from tree.
|
||||
# It is useful for two things:
|
||||
# - Test manifests changes (obviously), ensuring they don't break existing clusters
|
||||
# - Ensure manifests work with the latest versions even with no manifest change
|
||||
# (compared to helm charts, manifests cannot easily template changes based on versions)
|
||||
# Helm charts are _trailing_ releases, while manifests are done during development.
|
||||
e2e-manifests:
|
||||
name: End-to-End test with kured with code and manifests from HEAD
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
kubernetes:
|
||||
- "1.26"
|
||||
- "1.27"
|
||||
- "1.28"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
- name: Setup GoReleaser
|
||||
run: make bootstrap-tools
|
||||
- name: Find current tag version
|
||||
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
|
||||
id: tags
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
VERSION="${{ steps.tags.outputs.sha_short }}" make image
|
||||
VERSION="${{ steps.tags.outputs.sha_short }}" make manifest
|
||||
|
||||
- name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions
|
||||
run: |
|
||||
sudo bash << EOF
|
||||
cp /etc/docker/daemon.json /etc/docker/daemon.json.old
|
||||
echo '{}' > /etc/docker/daemon.json
|
||||
systemctl restart docker || journalctl --no-pager -n 500
|
||||
systemctl status docker
|
||||
EOF
|
||||
|
||||
# Default name for helm/kind-action kind clusters is "chart-testing"
|
||||
- name: Create kind cluster with 5 nodes
|
||||
uses: helm/kind-action@v1.8.0
|
||||
with:
|
||||
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
|
||||
version: v0.14.0
|
||||
|
||||
- name: Preload previously built images onto kind cluster
|
||||
run: kind load docker-image ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }} --name chart-testing
|
||||
|
||||
- name: Do not wait for an hour before detecting the rebootSentinel
|
||||
run: |
|
||||
sed -i 's/#\(.*\)--period=1h/\1--period=30s/g' kured-ds.yaml
|
||||
|
||||
- name: Install kured with kubectl
|
||||
run: |
|
||||
kubectl apply -f kured-rbac.yaml && kubectl apply -f kured-ds.yaml
|
||||
|
||||
- name: Ensure kured is ready
|
||||
uses: nick-invision/retry@v2.8.3
|
||||
with:
|
||||
timeout_minutes: 10
|
||||
max_attempts: 10
|
||||
retry_wait_seconds: 60
|
||||
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size
|
||||
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*5.*5.*5.*5.*5'"
|
||||
|
||||
- name: Create reboot sentinel files
|
||||
run: |
|
||||
./tests/kind/create-reboot-sentinels.sh
|
||||
|
||||
- name: Follow reboot until success
|
||||
env:
|
||||
DEBUG: true
|
||||
run: |
|
||||
./tests/kind/follow-coordinated-reboot.sh
|
||||
|
||||
|
||||
|
||||
# This ensures the latest code works with the manifests built from tree.
|
||||
# It is useful for two things:
|
||||
# - Test manifests changes (obviously), ensuring they don't break existing clusters
|
||||
# - Ensure manifests work with the latest versions even with no manifest change
|
||||
# (compared to helm charts, manifests cannot easily template changes based on versions)
|
||||
# Helm charts are _trailing_ releases, while manifests are done during development.
|
||||
# Concurrency = 2
|
||||
e2e-manifests-concurent:
|
||||
name: End-to-End test with kured with code and manifests from HEAD (concurrent)
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
kubernetes:
|
||||
- "1.26"
|
||||
- "1.27"
|
||||
- "1.28"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
- name: Setup GoReleaser
|
||||
run: make bootstrap-tools
|
||||
- name: Find current tag version
|
||||
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
|
||||
id: tags
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
VERSION="${{ steps.tags.outputs.sha_short }}" make image
|
||||
VERSION="${{ steps.tags.outputs.sha_short }}" make manifest
|
||||
|
||||
- name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions
|
||||
run: |
|
||||
sudo bash << EOF
|
||||
cp /etc/docker/daemon.json /etc/docker/daemon.json.old
|
||||
echo '{}' > /etc/docker/daemon.json
|
||||
systemctl restart docker || journalctl --no-pager -n 500
|
||||
systemctl status docker
|
||||
EOF
|
||||
|
||||
# Default name for helm/kind-action kind clusters is "chart-testing"
|
||||
- name: Create kind cluster with 5 nodes
|
||||
uses: helm/kind-action@v1.8.0
|
||||
with:
|
||||
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
|
||||
version: v0.14.0
|
||||
|
||||
- name: Preload previously built images onto kind cluster
|
||||
run: kind load docker-image ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }} --name chart-testing
|
||||
|
||||
- name: Do not wait for an hour before detecting the rebootSentinel
|
||||
run: |
|
||||
sed -i 's/#\(.*\)--period=1h/\1--period=30s/g' kured-ds.yaml
|
||||
sed -i 's/#\(.*\)--concurrency=1/\1--concurrency=2/g' kured-ds.yaml
|
||||
|
||||
- name: Install kured with kubectl
|
||||
run: |
|
||||
kubectl apply -f kured-rbac.yaml && kubectl apply -f kured-ds.yaml
|
||||
|
||||
- name: Ensure kured is ready
|
||||
uses: nick-invision/retry@v2.8.3
|
||||
with:
|
||||
timeout_minutes: 10
|
||||
max_attempts: 10
|
||||
retry_wait_seconds: 60
|
||||
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size
|
||||
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*5.*5.*5.*5.*5'"
|
||||
|
||||
- name: Create reboot sentinel files
|
||||
run: |
|
||||
./tests/kind/create-reboot-sentinels.sh
|
||||
|
||||
- name: Follow reboot until success
|
||||
env:
|
||||
DEBUG: true
|
||||
run: |
|
||||
./tests/kind/follow-coordinated-reboot.sh
|
||||
97
.github/workflows/on-tag.yaml
vendored
Normal file
97
.github/workflows/on-tag.yaml
vendored
Normal file
@@ -0,0 +1,97 @@
|
||||
# when we add a tag to the repo, we should publish the kured image to a public repository
|
||||
# if it's safe.
|
||||
# It doesn't mean it's ready for release, but at least it's getting us started.
|
||||
# The next step is to have a PR with the helm chart, to bump the version of the image used
|
||||
name: Tag repo
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "*"
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
|
||||
jobs:
|
||||
tag-scan-and-push-final-image:
|
||||
name: "Build, scan, and publish tagged image"
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: write
|
||||
packages: write
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
- name: Find current tag version
|
||||
run: echo "version=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
|
||||
id: tags
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
- name: Setup GoReleaser
|
||||
run: make bootstrap-tools
|
||||
- name: Build binaries
|
||||
run: make kured-release-tag
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Build single image for scan
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/amd64
|
||||
push: false
|
||||
load: true
|
||||
tags: |
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
|
||||
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@41f05d9ecffa2ed3f1580af306000f734b733e54
|
||||
with:
|
||||
image-ref: '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}'
|
||||
format: 'table'
|
||||
exit-code: '1'
|
||||
ignore-unfixed: true
|
||||
vuln-type: 'os,library'
|
||||
severity: 'CRITICAL,HIGH'
|
||||
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@818d4b7b91585d195f67373fd9cb0332e31a7175
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
|
||||
- name: Build release images
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/arm64, linux/amd64, linux/arm/v7, linux/arm/v6, linux/386
|
||||
push: true
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
tags: |
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
|
||||
|
||||
- name: Generate SBOM
|
||||
run: |
|
||||
.tmp/syft ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }} -o spdx > kured.sbom
|
||||
|
||||
- name: Sign and attest artifacts
|
||||
run: |
|
||||
.tmp/cosign sign -y -r ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
|
||||
|
||||
.tmp/cosign sign-blob -y --output-signature kured.sbom.sig kured.sbom
|
||||
|
||||
.tmp/cosign attest -y --type spdx --predicate kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
|
||||
.tmp/cosign attach sbom --type spdx --sbom kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
|
||||
80
.github/workflows/periodics-daily.yaml
vendored
Normal file
80
.github/workflows/periodics-daily.yaml
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
name: Daily jobs
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "30 1 * * *"
|
||||
|
||||
jobs:
|
||||
periodics-gotest:
|
||||
name: Run go tests
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: run tests
|
||||
run: go test -json ./... > test.json
|
||||
- name: Annotate tests
|
||||
if: always()
|
||||
uses: guyarb/golang-test-annoations@v0.7.0
|
||||
with:
|
||||
test-results: test.json
|
||||
|
||||
periodics-mark-stale:
|
||||
name: Mark stale issues and PRs
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# Stale by default waits for 60 days before marking PR/issues as stale, and closes them after 21 days.
|
||||
# Do not expire the first issues that would allow the community to grow.
|
||||
- uses: actions/stale@v8
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
stale-issue-message: 'This issue was automatically considered stale due to lack of activity. Please update it and/or join our slack channels to promote it, before it automatically closes (in 7 days).'
|
||||
stale-pr-message: 'This PR was automatically considered stale due to lack of activity. Please refresh it and/or join our slack channels to highlight it, before it automatically closes (in 7 days).'
|
||||
stale-issue-label: 'no-issue-activity'
|
||||
stale-pr-label: 'no-pr-activity'
|
||||
exempt-issue-labels: 'good first issue,keep'
|
||||
days-before-close: 21
|
||||
|
||||
check-docs-links:
|
||||
name: Check docs for incorrect links
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Link Checker
|
||||
uses: lycheeverse/lychee-action@ec3ed119d4f44ad2673a7232460dc7dff59d2421
|
||||
env:
|
||||
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
||||
with:
|
||||
args: --verbose --no-progress '*.md' '*.yaml' '*/*/*.go' --exclude-link-local
|
||||
fail: true
|
||||
|
||||
vuln-scan:
|
||||
name: Build image and scan it against known vulnerabilities
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
- name: Setup GoReleaser
|
||||
run: make bootstrap-tools
|
||||
- name: Find current tag version
|
||||
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
|
||||
id: tags
|
||||
- name: Build artifacts
|
||||
run: VERSION="${{ steps.tags.outputs.sha_short }}" make image
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@41f05d9ecffa2ed3f1580af306000f734b733e54
|
||||
with:
|
||||
image-ref: 'ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }}'
|
||||
format: 'table'
|
||||
exit-code: '1'
|
||||
ignore-unfixed: true
|
||||
vuln-type: 'os,library'
|
||||
severity: 'CRITICAL,HIGH'
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,4 +1,5 @@
|
||||
cmd/kured/kured
|
||||
cmd/prom-active-alerts/prom-active-alerts
|
||||
vendor
|
||||
build
|
||||
dist
|
||||
.tmp
|
||||
|
||||
32
.goreleaser.yml
Normal file
32
.goreleaser.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
project_name: kured
|
||||
before:
|
||||
hooks:
|
||||
- go mod tidy
|
||||
builds:
|
||||
- main: ./cmd/kured
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
goos:
|
||||
- linux
|
||||
goarch:
|
||||
- amd64
|
||||
- arm64
|
||||
- arm
|
||||
- "386"
|
||||
goarm:
|
||||
- "6"
|
||||
- "7"
|
||||
ldflags:
|
||||
- -s -w -X main.version={{ if .IsSnapshot }}{{ .ShortCommit }}{{ else }}{{ .Version }}{{ end }}
|
||||
mod_timestamp: "{{ .CommitTimestamp }}"
|
||||
flags:
|
||||
- -trimpath
|
||||
|
||||
snapshot:
|
||||
name_template: "{{ .ShortCommit }}"
|
||||
|
||||
release:
|
||||
disable: true
|
||||
|
||||
changelog:
|
||||
skip: true
|
||||
6
.lycheeignore
Normal file
6
.lycheeignore
Normal file
@@ -0,0 +1,6 @@
|
||||
app.fossa.com
|
||||
cluster.local
|
||||
hooks.slack.com
|
||||
localhost
|
||||
slack://
|
||||
teams://
|
||||
3
CODE_OF_CONDUCT.md
Normal file
3
CODE_OF_CONDUCT.md
Normal file
@@ -0,0 +1,3 @@
|
||||
## Kured Community Code of Conduct
|
||||
|
||||
Kured follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/main/code-of-conduct.md).
|
||||
242
CONTRIBUTING.md
Normal file
242
CONTRIBUTING.md
Normal file
@@ -0,0 +1,242 @@
|
||||
# Developing `kured`
|
||||
|
||||
We love contributions to `kured`, no matter if you are [helping out on
|
||||
Slack][slack], reporting or triaging [issues][issues] or contributing code
|
||||
to `kured`.
|
||||
|
||||
In any case, it will make sense to familiarise yourself with the main
|
||||
[README][readme] to understand the different features and options, which is
|
||||
helpful for testing. The "building" section in particular makes sense if
|
||||
you are planning to contribute code.
|
||||
|
||||
[slack]: README.md#getting-help
|
||||
[issues]: https://github.com/kubereboot/kured/issues
|
||||
[readme]: README.md
|
||||
|
||||
## Certificate of Origin
|
||||
|
||||
By contributing to this project you agree to the Developer Certificate of
|
||||
Origin (DCO). This document was created by the Linux Kernel community and is a
|
||||
simple statement that you, as a contributor, have the legal right to make the
|
||||
contribution.
|
||||
|
||||
We require all commits to be signed. By signing off with your signature, you
|
||||
certify that you wrote the patch or otherwise have the right to contribute the
|
||||
material by the rules of the [DCO](DCO):
|
||||
|
||||
`Signed-off-by: Jane Doe <jane.doe@example.com>`
|
||||
|
||||
The signature must contain your real name
|
||||
(sorry, no pseudonyms or anonymous contributions)
|
||||
If your `user.name` and `user.email` are configured in your Git config,
|
||||
you can sign your commit automatically with `git commit -s`.
|
||||
|
||||
## Kured Repositories
|
||||
|
||||
All Kured repositories are kept under <https://github.com/kubereboot>. To find the code and work on the individual pieces that make Kured, here is our overview:
|
||||
|
||||
| Repositories | Contents |
|
||||
| --------------------------------------- | ------------------------- |
|
||||
| <https://github.com/kubereboot/kured> | Kured operator itself |
|
||||
| <https://github.com/kubereboot/charts> | Helm chart |
|
||||
| <https://github.com/kubereboot/website> | website and documentation |
|
||||
|
||||
## Regular development activities
|
||||
|
||||
### Prepare environment
|
||||
|
||||
Please run `make bootstrap-tools` once on a fresh repository clone to download several needed tools, e.g. GoReleaser.
|
||||
|
||||
### Updating k8s support
|
||||
|
||||
Whenever we want to update e.g. the `kubectl` or `client-go` dependencies,
|
||||
some RBAC changes might be necessary too.
|
||||
|
||||
This is what it took to support Kubernetes 1.14:
|
||||
<https://github.com/kubereboot/kured/pull/75>
|
||||
|
||||
That the process can be more involved based on kubernetes changes.
|
||||
For example, k8s 1.10 changes to apps triggered the following commits:
|
||||
|
||||
b3f9ddf: Bump client-go for optimum k8s 1.10 compatibility
|
||||
bc3f28d: Move deployment manifest to apps/v1
|
||||
908998a: Update RBAC permissions for kubectl v1.10.3
|
||||
efbb0c3: Document version compatibility in release notes
|
||||
5731b98: Add warning to Dockerfile re: upgrading kubectl
|
||||
|
||||
Search the git log for inspiration for your cases.
|
||||
|
||||
Please update our `.github/workflows` with the new k8s images, starting by
|
||||
the creation of a `.github/kind-cluster-<version>.yaml`, then updating
|
||||
our workflows with the new versions.
|
||||
|
||||
Once you updated everything, make sure you update the support matrix on
|
||||
the main [README][readme] as well.
|
||||
|
||||
### Updating other dependencies
|
||||
|
||||
Dependabot proposes changes in our go.mod/go.sum.
|
||||
Some of those changes are covered by CI testing, some are not.
|
||||
|
||||
Please make sure to test those not covered by CI (mostly the integration
|
||||
with other tools) manually before merging.
|
||||
|
||||
### Review periodic jobs
|
||||
|
||||
We run periodic jobs (see also Automated testing section of this documentation).
|
||||
Those should be monitored for failures.
|
||||
|
||||
If a failure happen in periodics, something terribly wrong must have happened
|
||||
(or github is failing at the creation of a kind cluster). Please monitor those
|
||||
failures carefully.
|
||||
|
||||
### Introducing new features
|
||||
|
||||
When you introduce a new feature, the kured team expects you to have tested
|
||||
your change thoroughly. If possible, include all the necessary testing in your change.
|
||||
|
||||
If your change involves a user facing change (change in flags of kured for example),
|
||||
please include expose your new feature in our default manifest (`kured-ds.yaml`),
|
||||
as a comment.
|
||||
|
||||
Our release manifests and helm charts are our stable interfaces.
|
||||
Any user facing changes will therefore have to wait for a release before being
|
||||
exposed to our users.
|
||||
|
||||
This also means that when you expose a new feature, you should create another PR
|
||||
for your changes in <https://github.com/kubereboot/charts> to make your feature
|
||||
available at the next kured version for helm users.
|
||||
|
||||
In the charts PR, you can directly bump the appVersion to the next minor version
|
||||
(you are introducing a new feature, which requires a bump of the minor number.
|
||||
For example, if current appVersion is 1.6.x, make sure you update your appVersion
|
||||
to 1.7.0). It allows us to have an easy view of what we land each release.
|
||||
|
||||
Do not hesitate to increase the test coverage for your feature, whether it's unit
|
||||
testing to full functional testing (even using helm charts)
|
||||
|
||||
### Increasing test coverage
|
||||
|
||||
We are welcoming any change to increase our test coverage.
|
||||
See also our github issues for the label `testing`.
|
||||
|
||||
## Automated testing
|
||||
|
||||
Our CI is covered by github actions.
|
||||
You can see their contents in .github/workflows.
|
||||
|
||||
We currently run:
|
||||
|
||||
- go tests and lint
|
||||
- `shellcheck`
|
||||
- a check for dead links in our docs
|
||||
- a security check against our base image (alpine)
|
||||
- a deep functional test using our manifests on all supported k8s versions
|
||||
|
||||
To test your code manually, follow the section Manual testing.
|
||||
|
||||
## Manual (release) testing
|
||||
|
||||
Before `kured` is released, we want to make sure it still works fine on the
|
||||
previous, current and next minor version of Kubernetes (with respect to the
|
||||
`client-go` & `kubectl` dependencies in use). For local testing e.g.
|
||||
`minikube` or `kind` can be sufficient. This will allow you to catch issues
|
||||
that might not have been tested in our CI, like integration with other tools,
|
||||
or your specific use case.
|
||||
|
||||
Deploy kured in your test scenario, make sure you pass the right `image`,
|
||||
update the e.g. `period` and `reboot-days` options, so you get immediate
|
||||
results, if you login to a node and run:
|
||||
|
||||
```console
|
||||
sudo touch /var/run/reboot-required
|
||||
```
|
||||
|
||||
### Example of golang testing
|
||||
|
||||
Please run `make test`. You should have `golint` installed.
|
||||
|
||||
### Example of testing with `minikube`
|
||||
|
||||
A test-run with `minikube` could look like this:
|
||||
|
||||
```console
|
||||
# start minikube
|
||||
minikube start --driver=kvm2 --kubernetes-version <k8s-release>
|
||||
|
||||
# build kured image and publish to registry accessible by minikube
|
||||
make image minikube-publish
|
||||
|
||||
# edit kured-ds.yaml to
|
||||
# - point to new image
|
||||
# - change e.g. period and reboot-days option for immediate results
|
||||
|
||||
minikube kubectl -- apply -f kured-rbac.yaml
|
||||
minikube kubectl -- apply -f kured-ds.yaml
|
||||
minikube kubectl -- logs daemonset.apps/kured -n kube-system -f
|
||||
|
||||
# In separate terminal
|
||||
minikube ssh
|
||||
sudo touch /var/run/reboot-required
|
||||
minikube logs -f
|
||||
```
|
||||
|
||||
Now check for the 'Commanding reboot' message and minikube going down.
|
||||
|
||||
Unfortunately as of today, you are going to run into
|
||||
<https://github.com/kubernetes/minikube/issues/2874>. This means that
|
||||
minikube won't come back easily. You will need to start minikube again.
|
||||
Then you can check for the lock release.
|
||||
|
||||
### Example of testing with `kind`
|
||||
|
||||
A test-run with `kind` could look like this:
|
||||
|
||||
```console
|
||||
# create kind cluster
|
||||
kind create cluster --config .github/kind-cluster-<k8s-version>.yaml
|
||||
|
||||
# create reboot required files on pre-defined kind nodes
|
||||
./tests/kind/create-reboot-sentinels.sh
|
||||
|
||||
# check if reboot is working fine
|
||||
./tests/kind/follow-coordinated-reboot.sh
|
||||
|
||||
```
|
||||
|
||||
## Publishing a new kured release
|
||||
|
||||
### Prepare Documentation
|
||||
|
||||
Check that `README.md` has an updated compatibility matrix and that the
|
||||
url in the `kubectl` incantation (under "Installation") is updated to the
|
||||
new version you want to release.
|
||||
|
||||
### Create a tag on the repo
|
||||
|
||||
Before going further, we should freeze the code for a release, by
|
||||
tagging the code. The Github-Action should start a new job and push
|
||||
the new image to the registry.
|
||||
|
||||
### Create the combined manifest
|
||||
|
||||
Now create the `kured-<release>-dockerhub.yaml` for e.g. `1.3.0`:
|
||||
|
||||
```sh
|
||||
VERSION=1.3.0
|
||||
MANIFEST="kured-$VERSION-dockerhub.yaml"
|
||||
make DH_ORG="kubereboot" VERSION="${VERSION}" manifest
|
||||
cat kured-rbac.yaml > "$MANIFEST"
|
||||
cat kured-ds.yaml >> "$MANIFEST"
|
||||
```
|
||||
|
||||
### Publish release artifacts
|
||||
|
||||
Now you can head to the Github UI, use the version number as tag and upload the
|
||||
`kured-<release>-dockerhub.yaml` file.
|
||||
|
||||
Please describe what's new and noteworthy in the release notes, list the PRs
|
||||
that landed and give a shout-out to everyone who contributed.
|
||||
|
||||
Please also note down on which releases the upcoming `kured` release was
|
||||
tested on. (Check old release notes if you're unsure.)
|
||||
36
DCO
Normal file
36
DCO
Normal file
@@ -0,0 +1,36 @@
|
||||
Developer Certificate of Origin
|
||||
Version 1.1
|
||||
|
||||
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
|
||||
660 York Street, Suite 102,
|
||||
San Francisco, CA 94110 USA
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies of this
|
||||
license document, but changing it is not allowed.
|
||||
|
||||
|
||||
Developer's Certificate of Origin 1.1
|
||||
|
||||
By making a contribution to this project, I certify that:
|
||||
|
||||
(a) The contribution was created in whole or in part by me and I
|
||||
have the right to submit it under the open source license
|
||||
indicated in the file; or
|
||||
|
||||
(b) The contribution is based upon previous work that, to the best
|
||||
of my knowledge, is covered under an appropriate open source
|
||||
license and I have the right under that license to submit that
|
||||
work with modifications, whether created in whole or in part
|
||||
by me, under the same open source license (unless I am
|
||||
permitted to submit under a different license), as indicated
|
||||
in the file; or
|
||||
|
||||
(c) The contribution was provided directly to me by some other
|
||||
person who certified (a), (b) or (c) and I have not modified
|
||||
it.
|
||||
|
||||
(d) I understand and agree that this project and the contribution
|
||||
are public and that a record of the contribution (including all
|
||||
personal information I submit with it, including my sign-off) is
|
||||
maintained indefinitely and may be redistributed consistent with
|
||||
this project or the open source license(s) involved.
|
||||
1
DEVELOPMENT.md
Normal file
1
DEVELOPMENT.md
Normal file
@@ -0,0 +1 @@
|
||||
This file was moved to [CONTRIBUTING.md](CONTRIBUTING.md).
|
||||
25
Dockerfile
Normal file
25
Dockerfile
Normal file
@@ -0,0 +1,25 @@
|
||||
FROM --platform=$TARGETPLATFORM alpine:3.18.3 as bin
|
||||
|
||||
ARG TARGETOS
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
COPY dist/ /dist
|
||||
RUN set -ex \
|
||||
&& case "${TARGETARCH}" in \
|
||||
amd64) \
|
||||
SUFFIX="_v1" \
|
||||
;; \
|
||||
arm) \
|
||||
SUFFIX="_${TARGETVARIANT:1}" \
|
||||
;; \
|
||||
*) \
|
||||
SUFFIX="" \
|
||||
;; \
|
||||
esac \
|
||||
&& cp /dist/kured_${TARGETOS}_${TARGETARCH}${SUFFIX}/kured /dist/kured;
|
||||
|
||||
FROM --platform=$TARGETPLATFORM alpine:3.18.3
|
||||
RUN apk update --no-cache && apk upgrade --no-cache && apk add --no-cache ca-certificates tzdata
|
||||
COPY --from=bin /dist/kured /usr/bin/kured
|
||||
ENTRYPOINT ["/usr/bin/kured"]
|
||||
112
GOVERNANCE.md
Normal file
112
GOVERNANCE.md
Normal file
@@ -0,0 +1,112 @@
|
||||
# Project Governance
|
||||
|
||||
- [Values](#values)
|
||||
- [Maintainers](#maintainers)
|
||||
- [Becoming a Maintainer](#becoming-a-maintainer)
|
||||
- [Meetings](#meetings)
|
||||
- [Code of Conduct Enforcement](#code-of-conduct)
|
||||
- [Voting](#voting)
|
||||
|
||||
## Values
|
||||
|
||||
The Kured project and its leadership embrace the following values:
|
||||
|
||||
- Openness: Communication and decision-making happens in the open and is discoverable for future
|
||||
reference. As much as possible, all discussions and work take place in public
|
||||
forums and open repositories.
|
||||
|
||||
- Fairness: All stakeholders have the opportunity to provide feedback and submit
|
||||
contributions, which will be considered on their merits.
|
||||
|
||||
- Community over Product or Company: Sustaining and growing our community takes
|
||||
priority over shipping code or sponsors' organizational goals. Each
|
||||
contributor participates in the project as an individual.
|
||||
|
||||
- Inclusivity: We innovate through different perspectives and skill sets, which
|
||||
can only be accomplished in a welcoming and respectful environment.
|
||||
|
||||
- Participation: Responsibilities within the project are earned through
|
||||
participation, and there is a clear path up the contributor ladder into leadership
|
||||
positions.
|
||||
|
||||
- Consensus: Whether or not wider input is required, the Kured community believes that
|
||||
the best decisions are reached through Consensus
|
||||
<https://en.wikipedia.org/wiki/Consensus_decision-making>.
|
||||
|
||||
## Maintainers
|
||||
|
||||
Kured Maintainers have write access to the [project GitHub
|
||||
organisation](https://github.com/kubereboot). They can merge their own patches or patches
|
||||
from others. The current maintainers can be found in [MAINTAINERS][maintainers-file].
|
||||
Maintainers collectively manage the project's resources and contributors.
|
||||
|
||||
This privilege is granted with some expectation of responsibility: maintainers
|
||||
are people who care about the Kured project and want to help it grow and
|
||||
improve. A maintainer is not just someone who can make changes, but someone who
|
||||
has demonstrated their ability to collaborate with the team, get the most
|
||||
knowledgeable people to review code and docs, contribute high-quality code, and
|
||||
follow through to fix issues (in code or tests).
|
||||
|
||||
A maintainer is a contributor to the project's success and a citizen helping
|
||||
the project succeed.
|
||||
|
||||
## Becoming a Maintainer
|
||||
|
||||
To become a Maintainer you need to demonstrate the following:
|
||||
|
||||
- commitment to the project:
|
||||
- participate in discussions, contributions, code and documentation reviews
|
||||
for 3 months or more and participate in Slack discussions and meetings
|
||||
if possible,
|
||||
- perform reviews for 5 non-trivial pull requests,
|
||||
- contribute 5 non-trivial pull requests and have them merged,
|
||||
- ability to write quality code and/or documentation,
|
||||
- ability to collaborate with the team,
|
||||
- understanding of how the team works (policies, processes for testing and code review, etc),
|
||||
- understanding of the project's code base and coding and documentation style.
|
||||
|
||||
We realise that everybody brings different abilities and qualities to the team, that's
|
||||
why we are willing to change the rules somewhat depending on the circumstances.
|
||||
|
||||
A new Maintainer can apply by proposing a PR to the [MAINTAINERS
|
||||
file][maintainers-file]. A simple majority vote of existing Maintainers
|
||||
approves the application.
|
||||
|
||||
Maintainers who are selected will be granted the necessary GitHub rights,
|
||||
and invited to the [private maintainer mailing list][private-list].
|
||||
|
||||
## Meetings
|
||||
|
||||
Time zones permitting, Maintainers are expected to participate in the public
|
||||
developer meeting, details can be found [here][meeting-agenda].
|
||||
|
||||
Maintainers will also have closed meetings in order to discuss security reports
|
||||
or Code of Conduct violations. Such meetings should be scheduled by any
|
||||
Maintainer on receipt of a security issue or CoC report. All current Maintainers
|
||||
must be invited to such closed meetings, except for any Maintainer who is
|
||||
accused of a CoC violation.
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
[Code of Conduct](./CODE_OF_CONDUCT.md) violations by community members will
|
||||
be discussed and resolved on the [private Maintainer mailing list][private-list].
|
||||
If the reported CoC violator is a Maintainer, the Maintainers will instead
|
||||
designate two Maintainers to work with CNCF staff in resolving the report.
|
||||
|
||||
## Voting
|
||||
|
||||
While most business in Kured is conducted by "lazy consensus", periodically
|
||||
the Maintainers may need to vote on specific actions or changes.
|
||||
A vote can be taken in [kured issues labeled 'decision'][decision-issues] or
|
||||
[the private Maintainer mailing list][private-list] for security or conduct
|
||||
matters. Votes may also be taken at [the developer meeting][meeting-agenda].
|
||||
Any Maintainer may demand a vote be taken.
|
||||
|
||||
Most votes require a simple majority of all Maintainers to succeed. Maintainers
|
||||
can be removed by a 2/3 majority vote of all Maintainers, and changes to this
|
||||
Governance require a 2/3 vote of all Maintainers.
|
||||
|
||||
[maintainers-file]: ./MAINTAINERS
|
||||
[private-list]: cncf-kured-maintainers@lists.cncf.io
|
||||
[meeting-agenda]: https://docs.google.com/document/d/1AWT8YDdqZY-Se6Y1oAlwtujWLVpNVK2M_F_Vfqw06aI/edit
|
||||
[decision-issues]: https://github.com/kubereboot/kured/labels/decision
|
||||
431
Gopkg.lock
generated
431
Gopkg.lock
generated
@@ -1,431 +0,0 @@
|
||||
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
|
||||
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/PuerkitoBio/purell"
|
||||
packages = ["."]
|
||||
revision = "0bcb03f4b4d0a9428594752bd2a3b9aa0a9d4bd4"
|
||||
version = "v1.1.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/PuerkitoBio/urlesc"
|
||||
packages = ["."]
|
||||
revision = "bbf7a2afc14f93e1e0a5c06df524fbd75e5031e5"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/asaskevich/govalidator"
|
||||
packages = ["."]
|
||||
revision = "73945b6115bfbbcc57d89b7316e28109364124e1"
|
||||
version = "v7"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/beorn7/perks"
|
||||
packages = ["quantile"]
|
||||
revision = "4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/cloudfoundry-incubator/candiedyaml"
|
||||
packages = ["."]
|
||||
revision = "99c3df83b51532e3615f851d8c2dbb638f5313bf"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/davecgh/go-spew"
|
||||
packages = ["spew"]
|
||||
revision = "5215b55f46b2b919f50a1df0eaa5886afe4e3b3d"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/docker/distribution"
|
||||
packages = [
|
||||
"digest",
|
||||
"reference"
|
||||
]
|
||||
revision = "7365003236ca58bd7fa17ef1459328d13301d7d5"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/emicklei/go-restful"
|
||||
packages = [
|
||||
".",
|
||||
"log"
|
||||
]
|
||||
revision = "b14c3a95fc27c52959d2eddc85066da3c14bf269"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/emicklei/go-restful-swagger12"
|
||||
packages = ["."]
|
||||
revision = "dcef7f55730566d41eae5db10e7d6981829720f6"
|
||||
version = "1.0.1"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/ghodss/yaml"
|
||||
packages = ["."]
|
||||
revision = "aa0c862057666179de291b67d9f093d12b5a8473"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-openapi/analysis"
|
||||
packages = ["."]
|
||||
revision = "8ed83f2ea9f00f945516462951a288eaa68bf0d6"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-openapi/errors"
|
||||
packages = ["."]
|
||||
revision = "03cfca65330da08a5a440053faf994a3c682b5bf"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-openapi/jsonpointer"
|
||||
packages = ["."]
|
||||
revision = "779f45308c19820f1a69e9a4cd965f496e0da10f"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-openapi/jsonreference"
|
||||
packages = ["."]
|
||||
revision = "36d33bfe519efae5632669801b180bf1a245da3b"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-openapi/loads"
|
||||
packages = ["."]
|
||||
revision = "a80dea3052f00e5f032e860dd7355cd0cc67e24d"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-openapi/spec"
|
||||
packages = ["."]
|
||||
revision = "e51c28f07047ad90caff03f6450908720d337e0c"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-openapi/strfmt"
|
||||
packages = ["."]
|
||||
revision = "610b6cacdcde6852f4de68998bd20ce1dac85b22"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/go-openapi/swag"
|
||||
packages = ["."]
|
||||
revision = "24ebf76d720bab64f62824d76bced3184a65490d"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/gogo/protobuf"
|
||||
packages = [
|
||||
"proto",
|
||||
"sortkeys"
|
||||
]
|
||||
revision = "e33835a643a970c11ac74f6333f5f6866387a101"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/golang/glog"
|
||||
packages = ["."]
|
||||
revision = "23def4e6c14b4da8ac2ed8007337bc5eb5007998"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/golang/protobuf"
|
||||
packages = ["proto"]
|
||||
revision = "2bba0603135d7d7f5cb73b2125beeda19c09f4ef"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/google/gofuzz"
|
||||
packages = ["."]
|
||||
revision = "fd52762d25a41827db7ef64c43756fd4b9f7e382"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/inconshreveable/mousetrap"
|
||||
packages = ["."]
|
||||
revision = "76626ae9c91c4f2a10f34cad8ce83ea42c93bb75"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/juju/ratelimit"
|
||||
packages = ["."]
|
||||
revision = "5b9ff866471762aa2ab2dced63c9fb6f53921342"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/mailru/easyjson"
|
||||
packages = [
|
||||
"buffer",
|
||||
"jlexer",
|
||||
"jwriter"
|
||||
]
|
||||
revision = "2af9a745a611440bab0528e5ac19b2805a1c50eb"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/matttproud/golang_protobuf_extensions"
|
||||
packages = ["pbutil"]
|
||||
revision = "3247c84500bff8d9fb6d579d800f20b3e091582c"
|
||||
version = "v1.0.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/mitchellh/mapstructure"
|
||||
packages = ["."]
|
||||
revision = "d0303fe809921458f417bcf828397a65db30a7e4"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/prometheus/client_golang"
|
||||
packages = [
|
||||
"api/prometheus",
|
||||
"prometheus",
|
||||
"prometheus/promhttp"
|
||||
]
|
||||
revision = "5636dc67ae776adf5590da7349e70fbb9559972d"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/prometheus/client_model"
|
||||
packages = ["go"]
|
||||
revision = "6f3806018612930941127f2a7c6c453ba2c527d2"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/prometheus/common"
|
||||
packages = [
|
||||
"expfmt",
|
||||
"internal/bitbucket.org/ww/goautoneg",
|
||||
"model"
|
||||
]
|
||||
revision = "ebdfc6da46522d58825777cf1f90490a5b1ef1d8"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/prometheus/procfs"
|
||||
packages = [
|
||||
".",
|
||||
"xfs"
|
||||
]
|
||||
revision = "e645f4e5aaa8506fc71d6edbc5c4ff02c04c46f2"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/sirupsen/logrus"
|
||||
packages = ["."]
|
||||
revision = "c155da19408a8799da419ed3eeb0cb5db0ad5dbc"
|
||||
version = "v1.0.5"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/spf13/cobra"
|
||||
packages = ["."]
|
||||
revision = "b24564e919247d7c870fe0ed3738c98d8741aca4"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/spf13/pflag"
|
||||
packages = ["."]
|
||||
revision = "367864438f1b1a3c7db4da06a2f55b144e6784e0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/ugorji/go"
|
||||
packages = ["codec"]
|
||||
revision = "3487a5545b3d480987dfb0492035299077fab33a"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "golang.org/x/crypto"
|
||||
packages = ["ssh/terminal"]
|
||||
revision = "beb2a9779c3b677077c41673505f150149fce895"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "golang.org/x/net"
|
||||
packages = [
|
||||
"context",
|
||||
"context/ctxhttp",
|
||||
"http2",
|
||||
"http2/hpack",
|
||||
"idna"
|
||||
]
|
||||
revision = "2a35e686583654a1b89ca79c4ac78cb3d6529ca3"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "golang.org/x/sys"
|
||||
packages = [
|
||||
"unix",
|
||||
"windows"
|
||||
]
|
||||
revision = "3b87a42e500a6dc65dae1a55d0b641295971163e"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "golang.org/x/text"
|
||||
packages = [
|
||||
"internal/gen",
|
||||
"internal/triegen",
|
||||
"internal/ucd",
|
||||
"transform",
|
||||
"unicode/cldr",
|
||||
"unicode/norm",
|
||||
"width"
|
||||
]
|
||||
revision = "a9a820217f98f7c8a207ec1e45a874e1fe12c478"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "gopkg.in/inf.v0"
|
||||
packages = ["."]
|
||||
revision = "3887ee99ecf07df5b447e9b00d9c0b2adaa9f3e4"
|
||||
|
||||
[[projects]]
|
||||
branch = "v2"
|
||||
name = "gopkg.in/mgo.v2"
|
||||
packages = [
|
||||
"bson",
|
||||
"internal/json"
|
||||
]
|
||||
revision = "3f83fa5005286a7fe593b055f0d7771a7dce4655"
|
||||
|
||||
[[projects]]
|
||||
branch = "v2"
|
||||
name = "gopkg.in/yaml.v2"
|
||||
packages = ["."]
|
||||
revision = "cd8b52f8269e0feb286dfeef29f8fe4d5b397e0b"
|
||||
|
||||
[[projects]]
|
||||
branch = "release-1.7"
|
||||
name = "k8s.io/apimachinery"
|
||||
packages = [
|
||||
"pkg/api/equality",
|
||||
"pkg/api/errors",
|
||||
"pkg/api/meta",
|
||||
"pkg/api/resource",
|
||||
"pkg/apimachinery",
|
||||
"pkg/apimachinery/announced",
|
||||
"pkg/apimachinery/registered",
|
||||
"pkg/apis/meta/v1",
|
||||
"pkg/apis/meta/v1/unstructured",
|
||||
"pkg/apis/meta/v1alpha1",
|
||||
"pkg/conversion",
|
||||
"pkg/conversion/queryparams",
|
||||
"pkg/conversion/unstructured",
|
||||
"pkg/fields",
|
||||
"pkg/labels",
|
||||
"pkg/openapi",
|
||||
"pkg/runtime",
|
||||
"pkg/runtime/schema",
|
||||
"pkg/runtime/serializer",
|
||||
"pkg/runtime/serializer/json",
|
||||
"pkg/runtime/serializer/protobuf",
|
||||
"pkg/runtime/serializer/recognizer",
|
||||
"pkg/runtime/serializer/streaming",
|
||||
"pkg/runtime/serializer/versioning",
|
||||
"pkg/selection",
|
||||
"pkg/types",
|
||||
"pkg/util/clock",
|
||||
"pkg/util/diff",
|
||||
"pkg/util/errors",
|
||||
"pkg/util/framer",
|
||||
"pkg/util/intstr",
|
||||
"pkg/util/json",
|
||||
"pkg/util/net",
|
||||
"pkg/util/rand",
|
||||
"pkg/util/runtime",
|
||||
"pkg/util/sets",
|
||||
"pkg/util/validation",
|
||||
"pkg/util/validation/field",
|
||||
"pkg/util/wait",
|
||||
"pkg/util/yaml",
|
||||
"pkg/version",
|
||||
"pkg/watch",
|
||||
"third_party/forked/golang/reflect"
|
||||
]
|
||||
revision = "8ab5f3d8a330c2e9baaf84e39042db8d49034ae2"
|
||||
|
||||
[[projects]]
|
||||
name = "k8s.io/client-go"
|
||||
packages = [
|
||||
"discovery",
|
||||
"kubernetes",
|
||||
"kubernetes/scheme",
|
||||
"kubernetes/typed/admissionregistration/v1alpha1",
|
||||
"kubernetes/typed/apps/v1beta1",
|
||||
"kubernetes/typed/authentication/v1",
|
||||
"kubernetes/typed/authentication/v1beta1",
|
||||
"kubernetes/typed/authorization/v1",
|
||||
"kubernetes/typed/authorization/v1beta1",
|
||||
"kubernetes/typed/autoscaling/v1",
|
||||
"kubernetes/typed/autoscaling/v2alpha1",
|
||||
"kubernetes/typed/batch/v1",
|
||||
"kubernetes/typed/batch/v2alpha1",
|
||||
"kubernetes/typed/certificates/v1beta1",
|
||||
"kubernetes/typed/core/v1",
|
||||
"kubernetes/typed/extensions/v1beta1",
|
||||
"kubernetes/typed/networking/v1",
|
||||
"kubernetes/typed/policy/v1beta1",
|
||||
"kubernetes/typed/rbac/v1alpha1",
|
||||
"kubernetes/typed/rbac/v1beta1",
|
||||
"kubernetes/typed/settings/v1alpha1",
|
||||
"kubernetes/typed/storage/v1",
|
||||
"kubernetes/typed/storage/v1beta1",
|
||||
"pkg/api",
|
||||
"pkg/api/v1",
|
||||
"pkg/api/v1/ref",
|
||||
"pkg/apis/admissionregistration",
|
||||
"pkg/apis/admissionregistration/v1alpha1",
|
||||
"pkg/apis/apps",
|
||||
"pkg/apis/apps/v1beta1",
|
||||
"pkg/apis/authentication",
|
||||
"pkg/apis/authentication/v1",
|
||||
"pkg/apis/authentication/v1beta1",
|
||||
"pkg/apis/authorization",
|
||||
"pkg/apis/authorization/v1",
|
||||
"pkg/apis/authorization/v1beta1",
|
||||
"pkg/apis/autoscaling",
|
||||
"pkg/apis/autoscaling/v1",
|
||||
"pkg/apis/autoscaling/v2alpha1",
|
||||
"pkg/apis/batch",
|
||||
"pkg/apis/batch/v1",
|
||||
"pkg/apis/batch/v2alpha1",
|
||||
"pkg/apis/certificates",
|
||||
"pkg/apis/certificates/v1beta1",
|
||||
"pkg/apis/extensions",
|
||||
"pkg/apis/extensions/v1beta1",
|
||||
"pkg/apis/networking",
|
||||
"pkg/apis/networking/v1",
|
||||
"pkg/apis/policy",
|
||||
"pkg/apis/policy/v1beta1",
|
||||
"pkg/apis/rbac",
|
||||
"pkg/apis/rbac/v1alpha1",
|
||||
"pkg/apis/rbac/v1beta1",
|
||||
"pkg/apis/settings",
|
||||
"pkg/apis/settings/v1alpha1",
|
||||
"pkg/apis/storage",
|
||||
"pkg/apis/storage/v1",
|
||||
"pkg/apis/storage/v1beta1",
|
||||
"pkg/util",
|
||||
"pkg/util/parsers",
|
||||
"pkg/version",
|
||||
"rest",
|
||||
"rest/watch",
|
||||
"tools/clientcmd/api",
|
||||
"tools/metrics",
|
||||
"transport",
|
||||
"util/cert",
|
||||
"util/flowcontrol",
|
||||
"util/integer"
|
||||
]
|
||||
revision = "d92e8497f71b7b4e0494e5bd204b48d34bd6f254"
|
||||
version = "v4.0.0"
|
||||
|
||||
[solve-meta]
|
||||
analyzer-name = "dep"
|
||||
analyzer-version = 1
|
||||
inputs-digest = "029e6d2251ccbf5acdfc3bc0c36f340dc3a98511b0d7338c3e9bb167e412a155"
|
||||
solver-name = "gps-cdcl"
|
||||
solver-version = 1
|
||||
24
Gopkg.toml
24
Gopkg.toml
@@ -1,24 +0,0 @@
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/sirupsen/logrus"
|
||||
version = "v1.0.5"
|
||||
|
||||
[[constraint]]
|
||||
branch = "master"
|
||||
name = "github.com/prometheus/client_golang"
|
||||
|
||||
[[constraint]]
|
||||
branch = "master"
|
||||
name = "github.com/prometheus/common"
|
||||
|
||||
[[constraint]]
|
||||
branch = "master"
|
||||
name = "github.com/spf13/cobra"
|
||||
|
||||
[[constraint]]
|
||||
name = "k8s.io/client-go"
|
||||
version = "v4.0.0"
|
||||
|
||||
[[constraint]]
|
||||
name = "k8s.io/apimachinery"
|
||||
branch = "release-1.7"
|
||||
5
MAINTAINERS
Normal file
5
MAINTAINERS
Normal file
@@ -0,0 +1,5 @@
|
||||
Christian Kotzbauer <christian.kotzbauer@gmail.com> (@ckotzbauer)
|
||||
Daniel Holbach <daniel.holbach@gmail.com> (@dholbach)
|
||||
Hidde Beydals <hidde@weave.works> (@hiddeco)
|
||||
Jack Francis <jackfrancis@gmail.com> (@jackfrancis)
|
||||
Jean-Philippe Evrard <open-source@a.spamming.party> (@evrardjp)
|
||||
61
Makefile
61
Makefile
@@ -1,34 +1,53 @@
|
||||
.DEFAULT: all
|
||||
.PHONY: all clean image publish-image minikube-publish
|
||||
.PHONY: all clean image minikube-publish manifest test kured-all
|
||||
|
||||
DH_ORG=weaveworks
|
||||
VERSION=$(shell git symbolic-ref --short HEAD)-$(shell git rev-parse --short HEAD)
|
||||
TEMPDIR=./.tmp
|
||||
GORELEASER_CMD=$(TEMPDIR)/goreleaser
|
||||
DH_ORG=kubereboot
|
||||
VERSION=$(shell git rev-parse --short HEAD)
|
||||
SUDO=$(shell docker info >/dev/null 2>&1 || echo "sudo -E")
|
||||
|
||||
all: image
|
||||
|
||||
$(TEMPDIR):
|
||||
mkdir -p $(TEMPDIR)
|
||||
|
||||
.PHONY: bootstrap-tools
|
||||
bootstrap-tools: $(TEMPDIR)
|
||||
VERSION=v1.19.2 TMPDIR=.tmp bash .github/scripts/goreleaser-install.sh
|
||||
curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b .tmp v0.86.1
|
||||
curl -sSfL https://github.com/sigstore/cosign/releases/download/v2.1.1/cosign-linux-amd64 -o .tmp/cosign
|
||||
chmod +x .tmp/goreleaser .tmp/cosign .tmp/syft
|
||||
|
||||
clean:
|
||||
go clean
|
||||
rm -f cmd/kured/kured
|
||||
rm -rf ./build
|
||||
rm -rf ./dist
|
||||
|
||||
godeps=$(shell go get $1 && go list -f '{{join .Deps "\n"}}' $1 | grep -v /vendor/ | xargs go list -f '{{if not .Standard}}{{ $$dep := . }}{{range .GoFiles}}{{$$dep.Dir}}/{{.}} {{end}}{{end}}')
|
||||
kured:
|
||||
$(GORELEASER_CMD) build --clean --single-target --snapshot
|
||||
|
||||
DEPS=$(call godeps,./cmd/kured)
|
||||
kured-all:
|
||||
$(GORELEASER_CMD) build --clean --snapshot
|
||||
|
||||
cmd/kured/kured: $(DEPS)
|
||||
cmd/kured/kured: cmd/kured/*.go
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags "-X main.version=$(VERSION)" -o $@ cmd/kured/*.go
|
||||
kured-release-tag:
|
||||
$(GORELEASER_CMD) release --clean
|
||||
|
||||
build/.image.done: cmd/kured/Dockerfile cmd/kured/kured
|
||||
mkdir -p build
|
||||
cp $^ build
|
||||
sudo -E docker build -t quay.io/$(DH_ORG)/kured:$(VERSION) -f build/Dockerfile ./build
|
||||
touch $@
|
||||
kured-release-snapshot:
|
||||
$(GORELEASER_CMD) release --clean --snapshot
|
||||
|
||||
image: build/.image.done
|
||||
|
||||
publish-image: image
|
||||
sudo -E docker push quay.io/$(DH_ORG)/kured:$(VERSION)
|
||||
image: kured
|
||||
$(SUDO) docker buildx build --load -t ghcr.io/$(DH_ORG)/kured:$(VERSION) .
|
||||
|
||||
minikube-publish: image
|
||||
sudo -E docker save quay.io/$(DH_ORG)/kured:$(VERSION) | (eval $$(minikube docker-env) && docker load)
|
||||
$(SUDO) docker save ghcr.io/$(DH_ORG)/kured | (eval $$(minikube docker-env) && docker load)
|
||||
|
||||
manifest:
|
||||
sed -i "s#image: ghcr.io/.*kured.*#image: ghcr.io/$(DH_ORG)/kured:$(VERSION)#g" kured-ds.yaml
|
||||
echo "Please generate combined manifest if necessary"
|
||||
|
||||
test:
|
||||
echo "Running go tests"
|
||||
go test ./...
|
||||
echo "Running golint on pkg"
|
||||
golint ./pkg/...
|
||||
echo "Running golint on cmd"
|
||||
golint ./cmd/...
|
||||
|
||||
234
README.md
234
README.md
@@ -1,21 +1,17 @@
|
||||
# kured - Kubernetes Reboot Daemon
|
||||
|
||||
<img src="https://github.com/weaveworks/kured/raw/master/img/logo.png" align="right"/>
|
||||
[](https://artifacthub.io/packages/helm/kured/kured)
|
||||
[](https://app.fossa.com/projects/git%2Bgithub.com%2Fkubereboot%2Fkured?ref=badge_shield)
|
||||
[](https://clomonitor.io/projects/cncf/kured)
|
||||
|
||||
* [Introduction](#introduction)
|
||||
* [Kubernetes & OS Compatibility](#kubernetes-&-os-compatibility)
|
||||
* [Installation](#installation)
|
||||
* [Configuration](#configuration)
|
||||
* [Reboot Sentinel File & Period](#reboot-sentinel-file-&-period)
|
||||
* [Blocking Reboots via Alerts](#blocking-reboots-via-alerts)
|
||||
* [Prometheus Metrics](#prometheus-metrics)
|
||||
* [Slack Notifications](#slack-notifications)
|
||||
* [Overriding Lock Configuration](#overriding-lock-configuration)
|
||||
* [Operation](#operation)
|
||||
* [Testing](#testing)
|
||||
* [Disabling Reboots](#disabling-reboots)
|
||||
* [Manual Unlock](#manual-unlock)
|
||||
* [Building](#building)
|
||||
* [Getting Help](#getting-help)
|
||||
<img src="https://github.com/kubereboot/website/raw/main/static/img/kured.png" width="200" align="right"/>
|
||||
|
||||
- [kured - Kubernetes Reboot Daemon](#kured---kubernetes-reboot-daemon)
|
||||
- [Introduction](#introduction)
|
||||
- [Documentation](#documentation)
|
||||
- [Getting Help](#getting-help)
|
||||
- [Trademarks](#trademarks)
|
||||
- [License](#license)
|
||||
|
||||
## Introduction
|
||||
|
||||
@@ -23,188 +19,48 @@ Kured (KUbernetes REboot Daemon) is a Kubernetes daemonset that
|
||||
performs safe automatic node reboots when the need to do so is
|
||||
indicated by the package management system of the underlying OS.
|
||||
|
||||
* Watches for the presence of a reboot sentinel e.g. `/var/run/reboot-required`
|
||||
* Utilises a lock in the API server to ensure only one node reboots at
|
||||
- Watches for the presence of a reboot sentinel file e.g. `/var/run/reboot-required`
|
||||
or the successful run of a sentinel command.
|
||||
- Utilises a lock in the API server to ensure only one node reboots at
|
||||
a time
|
||||
* Optionally defers reboots in the presence of active Prometheus alerts
|
||||
* Cordons & drains worker nodes before reboot, uncordoning them after
|
||||
- Optionally defers reboots in the presence of active Prometheus alerts or selected pods
|
||||
- Cordons & drains worker nodes before reboot, uncordoning them after
|
||||
|
||||
## Kubernetes & OS Compatibility
|
||||
## Documentation
|
||||
|
||||
The daemon image contains a 1.7.x `k8s.io/client-go` and `kubectl`
|
||||
binary for the purposes of maintaining the lock and draining worker
|
||||
nodes. Whilst it has only been tested on a 1.7.x cluster, Kubernetes
|
||||
typically has good forwards/backwards compatibility so there is a
|
||||
reasonable chance it will work on adjacent versions; please file an
|
||||
issue if this is not the case.
|
||||
Find all our docs on <https://kured.dev>:
|
||||
|
||||
Additionally, the image contains a `systemctl` binary from Ubuntu
|
||||
16.04 in order to command reboots. Again, although this has not been
|
||||
tested against other systemd distributions there is a good chance that
|
||||
it will work.
|
||||
- [All Kured Documentation](https://kured.dev/docs/)
|
||||
- [Installing Kured](https://kured.dev/docs/installation/)
|
||||
- [Configuring Kured](https://kured.dev/docs/configuration/)
|
||||
- [Operating Kured](https://kured.dev/docs/operation/)
|
||||
- [Developing Kured](https://kured.dev/docs/development/)
|
||||
|
||||
## Installation
|
||||
|
||||
To obtain a default installation without Prometheus alerting interlock
|
||||
or Slack notifications:
|
||||
|
||||
```
|
||||
kubectl apply -f https://github.com/weaveworks/kured/releases/download/1.0.0/kured-ds.yaml
|
||||
```
|
||||
|
||||
If you want to customise the installation, download the manifest and
|
||||
edit it in accordance with the following section before application.
|
||||
|
||||
## Configuration
|
||||
|
||||
The following arguments can be passed to kured via the daemonset pod template:
|
||||
|
||||
```
|
||||
Flags:
|
||||
--alert-filter-regexp value alert names to ignore when checking for active alerts
|
||||
--ds-name string namespace containing daemonset on which to place lock (default "kube-system")
|
||||
--ds-namespace string name of daemonset on which to place lock (default "kured")
|
||||
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
|
||||
--period duration reboot check period (default 1h0m0s)
|
||||
--prometheus-url string Prometheus instance to probe for active alerts
|
||||
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
|
||||
--slack-hook-url string slack hook URL for reboot notfications
|
||||
--slack-username string slack username for reboot notfications (default "kured")
|
||||
```
|
||||
|
||||
### Reboot Sentinel File & Period
|
||||
|
||||
By default kured checks for the existence of
|
||||
`/var/run/reboot-required` every sixty minutes; you can override these
|
||||
values with `--reboot-sentinel` and `--period`. Each replica of the
|
||||
daemon uses a random offset derived from the period on startup so that
|
||||
nodes don't all contend for the lock simultaneously.
|
||||
|
||||
### Blocking Reboots via Alerts
|
||||
|
||||
You may find it desirable to block automatic node reboots when there
|
||||
are active alerts - you can do so by providing the URL of your
|
||||
Prometheus server:
|
||||
|
||||
```
|
||||
--prometheus-url=http://prometheus.monitoring.svc.cluster.local
|
||||
```
|
||||
|
||||
By default the presence of *any* active (pending or firing) alerts
|
||||
will block reboots, however you can ignore specific alerts:
|
||||
|
||||
```
|
||||
--alert-filter-regexp=^(RebootRequired|AnotherBenignAlert|...$
|
||||
```
|
||||
|
||||
An important application of this filter will become apparent in the
|
||||
next section.
|
||||
|
||||
### Prometheus Metrics
|
||||
|
||||
Each kured pod exposes a single gauge metric (`:8080/metrics`) that
|
||||
indicates the presence of the sentinel file:
|
||||
|
||||
```
|
||||
# HELP kured_reboot_required OS requires reboot due to software updates.
|
||||
# TYPE kured_reboot_required gauge
|
||||
kured_reboot_required{node="ip-xxx-xxx-xxx-xxx.ec2.internal"} 0
|
||||
```
|
||||
|
||||
The purpose of this metric is to power an alert which will summon an
|
||||
operator if the cluster cannot reboot itself automatically for a
|
||||
prolonged period:
|
||||
|
||||
```
|
||||
# Alert if a reboot is required for any machines. Acts as a failsafe for the
|
||||
# reboot daemon, which will not reboot nodes if there are pending alerts save
|
||||
# this one.
|
||||
ALERT RebootRequired
|
||||
IF max(kured_reboot_required) != 0
|
||||
FOR 24h
|
||||
LABELS { severity="warning" }
|
||||
ANNOTATIONS {
|
||||
summary = "Machine(s) require being rebooted, and the reboot daemon has failed to do so for 24 hours",
|
||||
impact = "Cluster nodes more vulnerable to security exploits. Eventually, no disk space left.",
|
||||
description = "Machine(s) require being rebooted, probably due to kernel update.",
|
||||
}
|
||||
```
|
||||
|
||||
If you choose to employ such an alert and have configured kured to
|
||||
probe for active alerts before rebooting, be sure to specify
|
||||
`--alert-filter-regexp=^RebootRequired$` to avoid deadlock!
|
||||
|
||||
### Slack Notifications
|
||||
|
||||
If you specify a Slack hook via `--slack-hook-url`, kured will notify
|
||||
you immediately prior to rebooting a node:
|
||||
|
||||
<img src="https://github.com/weaveworks/kured/raw/master/img/slack-notification.png"/>
|
||||
|
||||
We recommend setting `--slack-username` to be the name of the
|
||||
environment, e.g. `dev` or `prod`.
|
||||
|
||||
### Overriding Lock Configuration
|
||||
|
||||
The `--ds-name` and `--ds-namespace` arguments should match the name and
|
||||
namespace of the daemonset used to deploy the reboot daemon - the locking is
|
||||
implemented by means of an annotation on this resource. The defaults match
|
||||
the daemonset YAML provided in the repository.
|
||||
|
||||
Similarly `--lock-annotation` can be used to change the name of the
|
||||
annotation kured will use to store the lock, but the default is almost
|
||||
certainly safe.
|
||||
|
||||
## Operation
|
||||
|
||||
The example commands in this section assume that you have not
|
||||
overriden the default lock annotation, daemonset name or namespace;
|
||||
if you have, you will have to adjust the commands accordingly.
|
||||
|
||||
### Testing
|
||||
|
||||
You can test your configuration by provoking a reboot on a node:
|
||||
|
||||
```
|
||||
sudo touch /var/run/reboot-required
|
||||
```
|
||||
|
||||
### Disabling Reboots
|
||||
|
||||
If you need to temporarily stop kured from rebooting any nodes, you
|
||||
can take the lock manually:
|
||||
|
||||
```
|
||||
kubectl -n kube-system annotate ds kured weave.works/kured-node-lock='{"nodeID":"manual"}'
|
||||
```
|
||||
|
||||
Don't forget to release it afterwards!
|
||||
|
||||
### Manual Unlock
|
||||
|
||||
In exceptional circumstances, such as a node experiencing a permanent
|
||||
failure whilst rebooting, manual intervention may be required to
|
||||
remove the cluster lock:
|
||||
|
||||
```
|
||||
kubectl -n kube-system annotate ds kured weave.works/kured-node-lock-
|
||||
```
|
||||
> NB the `-` at the end of the command is important - it instructs
|
||||
> `kubectl` to remove that annotation entirely.
|
||||
|
||||
## Building
|
||||
|
||||
```
|
||||
dep ensure && make
|
||||
```
|
||||
And there's much more!
|
||||
|
||||
## Getting Help
|
||||
|
||||
If you have any questions about, feedback for or problems with `kured`:
|
||||
|
||||
- Invite yourself to the <a href="https://weaveworks.github.io/community-slack/" target="_blank"> #weave-community </a> slack channel.
|
||||
- Ask a question on the <a href="https://weave-community.slack.com/messages/general/"> #weave-community</a> slack channel.
|
||||
- Send an email to <a href="mailto:weave-users@weave.works">weave-users@weave.works</a>
|
||||
- <a href="https://github.com/weaveworks/kured/issues/new">File an issue.</a>
|
||||
- Invite yourself to the <a href="https://slack.cncf.io/" target="_blank">CNCF Slack</a>.
|
||||
- Ask a question on the [#kured](https://cloud-native.slack.com/archives/kured) slack channel.
|
||||
- [File an issue](https://github.com/kubereboot/kured/issues/new).
|
||||
- Join us in [our monthly meeting](https://docs.google.com/document/d/1AWT8YDdqZY-Se6Y1oAlwtujWLVpNVK2M_F_Vfqw06aI/edit),
|
||||
every first Wednesday of the month at 16:00 UTC.
|
||||
- You might want to [join the kured-dev mailing list](https://lists.cncf.io/g/cncf-kured-dev) as well.
|
||||
|
||||
We follow the [CNCF Code of Conduct](CODE_OF_CONDUCT.md).
|
||||
|
||||
Your feedback is always welcome!
|
||||
|
||||
## Trademarks
|
||||
|
||||
**Kured is a [Cloud Native Computing Foundation](https://cncf.io/) Sandbox project.**
|
||||
|
||||

|
||||
|
||||
The Linux Foundation® (TLF) has registered trademarks and uses trademarks. For a list of TLF trademarks, see [Trademark Usage](https://www.linuxfoundation.org/trademark-usage/).
|
||||
|
||||
## License
|
||||
|
||||
[](https://app.fossa.com/projects/git%2Bgithub.com%2Fkubereboot%2Fkured?ref=badge_large)
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
FROM ubuntu:16.04
|
||||
RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/cache/apt
|
||||
ADD https://storage.googleapis.com/kubernetes-release/release/v1.9.6/bin/linux/amd64/kubectl /usr/bin/kubectl
|
||||
RUN chmod 0755 /usr/bin/kubectl
|
||||
COPY ./kured /usr/bin/kured
|
||||
ENTRYPOINT ["/usr/bin/kured"]
|
||||
@@ -1,40 +1,89 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
papi "github.com/prometheus/client_golang/api"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/spf13/pflag"
|
||||
"github.com/spf13/viper"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/rest"
|
||||
kubectldrain "k8s.io/kubectl/pkg/drain"
|
||||
|
||||
"github.com/google/shlex"
|
||||
|
||||
shoutrrr "github.com/containrrr/shoutrrr"
|
||||
"github.com/kubereboot/kured/pkg/alerts"
|
||||
"github.com/kubereboot/kured/pkg/daemonsetlock"
|
||||
"github.com/kubereboot/kured/pkg/delaytick"
|
||||
"github.com/kubereboot/kured/pkg/taints"
|
||||
"github.com/kubereboot/kured/pkg/timewindow"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"github.com/weaveworks/kured/pkg/alerts"
|
||||
"github.com/weaveworks/kured/pkg/daemonsetlock"
|
||||
"github.com/weaveworks/kured/pkg/delaytick"
|
||||
"github.com/weaveworks/kured/pkg/notifications/slack"
|
||||
)
|
||||
|
||||
var (
|
||||
version = "unreleased"
|
||||
|
||||
// Command line flags
|
||||
period time.Duration
|
||||
dsNamespace string
|
||||
dsName string
|
||||
lockAnnotation string
|
||||
prometheusURL string
|
||||
alertFilter *regexp.Regexp
|
||||
rebootSentinel string
|
||||
slackHookURL string
|
||||
slackUsername string
|
||||
forceReboot bool
|
||||
drainTimeout time.Duration
|
||||
rebootDelay time.Duration
|
||||
period time.Duration
|
||||
metricsHost string
|
||||
metricsPort int
|
||||
drainGracePeriod int
|
||||
drainPodSelector string
|
||||
skipWaitForDeleteTimeoutSeconds int
|
||||
dsNamespace string
|
||||
dsName string
|
||||
lockAnnotation string
|
||||
lockTTL time.Duration
|
||||
lockReleaseDelay time.Duration
|
||||
prometheusURL string
|
||||
preferNoScheduleTaintName string
|
||||
alertFilter *regexp.Regexp
|
||||
alertFilterMatchOnly bool
|
||||
alertFiringOnly bool
|
||||
rebootSentinelFile string
|
||||
rebootSentinelCommand string
|
||||
notifyURL string
|
||||
slackHookURL string
|
||||
slackUsername string
|
||||
slackChannel string
|
||||
messageTemplateDrain string
|
||||
messageTemplateReboot string
|
||||
messageTemplateUncordon string
|
||||
podSelectors []string
|
||||
rebootCommand string
|
||||
logFormat string
|
||||
preRebootNodeLabels []string
|
||||
postRebootNodeLabels []string
|
||||
nodeID string
|
||||
concurrency int
|
||||
|
||||
rebootDays []string
|
||||
rebootStart string
|
||||
rebootEnd string
|
||||
timezone string
|
||||
annotateNodes bool
|
||||
|
||||
// Metrics
|
||||
rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
@@ -44,45 +93,212 @@ var (
|
||||
}, []string{"node"})
|
||||
)
|
||||
|
||||
const (
|
||||
// KuredNodeLockAnnotation is the canonical string value for the kured node-lock annotation
|
||||
KuredNodeLockAnnotation string = "weave.works/kured-node-lock"
|
||||
// KuredRebootInProgressAnnotation is the canonical string value for the kured reboot-in-progress annotation
|
||||
KuredRebootInProgressAnnotation string = "weave.works/kured-reboot-in-progress"
|
||||
// KuredMostRecentRebootNeededAnnotation is the canonical string value for the kured most-recent-reboot-needed annotation
|
||||
KuredMostRecentRebootNeededAnnotation string = "weave.works/kured-most-recent-reboot-needed"
|
||||
// EnvPrefix The environment variable prefix of all environment variables bound to our command line flags.
|
||||
EnvPrefix = "KURED"
|
||||
)
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(rebootRequiredGauge)
|
||||
}
|
||||
|
||||
func main() {
|
||||
rootCmd := &cobra.Command{
|
||||
Use: "kured",
|
||||
Short: "Kubernetes Reboot Daemon",
|
||||
Run: root}
|
||||
cmd := NewRootCommand()
|
||||
|
||||
if err := cmd.Execute(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// NewRootCommand construct the Cobra root command
|
||||
func NewRootCommand() *cobra.Command {
|
||||
rootCmd := &cobra.Command{
|
||||
Use: "kured",
|
||||
Short: "Kubernetes Reboot Daemon",
|
||||
PersistentPreRunE: bindViper,
|
||||
PreRun: flagCheck,
|
||||
Run: root}
|
||||
|
||||
rootCmd.PersistentFlags().StringVar(&nodeID, "node-id", "",
|
||||
"node name kured runs on, should be passed down from spec.nodeName via KURED_NODE_ID environment variable")
|
||||
rootCmd.PersistentFlags().BoolVar(&forceReboot, "force-reboot", false,
|
||||
"force a reboot even if the drain fails or times out")
|
||||
rootCmd.PersistentFlags().StringVar(&metricsHost, "metrics-host", "",
|
||||
"host where metrics will listen")
|
||||
rootCmd.PersistentFlags().IntVar(&metricsPort, "metrics-port", 8080,
|
||||
"port number where metrics will listen")
|
||||
rootCmd.PersistentFlags().IntVar(&drainGracePeriod, "drain-grace-period", -1,
|
||||
"time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used")
|
||||
rootCmd.PersistentFlags().StringVar(&drainPodSelector, "drain-pod-selector", "",
|
||||
"only drain pods with labels matching the selector (default: '', all pods)")
|
||||
rootCmd.PersistentFlags().IntVar(&skipWaitForDeleteTimeoutSeconds, "skip-wait-for-delete-timeout", 0,
|
||||
"when seconds is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node")
|
||||
rootCmd.PersistentFlags().DurationVar(&drainTimeout, "drain-timeout", 0,
|
||||
"timeout after which the drain is aborted (default: 0, infinite time)")
|
||||
rootCmd.PersistentFlags().DurationVar(&rebootDelay, "reboot-delay", 0,
|
||||
"delay reboot for this duration (default: 0, disabled)")
|
||||
rootCmd.PersistentFlags().DurationVar(&period, "period", time.Minute*60,
|
||||
"reboot check period")
|
||||
"sentinel check period")
|
||||
rootCmd.PersistentFlags().StringVar(&dsNamespace, "ds-namespace", "kube-system",
|
||||
"namespace containing daemonset on which to place lock")
|
||||
rootCmd.PersistentFlags().StringVar(&dsName, "ds-name", "kured",
|
||||
"name of daemonset on which to place lock")
|
||||
rootCmd.PersistentFlags().StringVar(&lockAnnotation, "lock-annotation", "weave.works/kured-node-lock",
|
||||
rootCmd.PersistentFlags().StringVar(&lockAnnotation, "lock-annotation", KuredNodeLockAnnotation,
|
||||
"annotation in which to record locking node")
|
||||
rootCmd.PersistentFlags().DurationVar(&lockTTL, "lock-ttl", 0,
|
||||
"expire lock annotation after this duration (default: 0, disabled)")
|
||||
rootCmd.PersistentFlags().DurationVar(&lockReleaseDelay, "lock-release-delay", 0,
|
||||
"delay lock release for this duration (default: 0, disabled)")
|
||||
rootCmd.PersistentFlags().StringVar(&prometheusURL, "prometheus-url", "",
|
||||
"Prometheus instance to probe for active alerts")
|
||||
rootCmd.PersistentFlags().Var(®expValue{&alertFilter}, "alert-filter-regexp",
|
||||
"alert names to ignore when checking for active alerts")
|
||||
rootCmd.PersistentFlags().StringVar(&rebootSentinel, "reboot-sentinel", "/var/run/reboot-required",
|
||||
"path to file whose existence signals need to reboot")
|
||||
rootCmd.PersistentFlags().BoolVar(&alertFilterMatchOnly, "alert-filter-match-only", false,
|
||||
"Only block if the alert-filter-regexp matches active alerts")
|
||||
rootCmd.PersistentFlags().BoolVar(&alertFiringOnly, "alert-firing-only", false,
|
||||
"only consider firing alerts when checking for active alerts")
|
||||
rootCmd.PersistentFlags().StringVar(&rebootSentinelFile, "reboot-sentinel", "/var/run/reboot-required",
|
||||
"path to file whose existence triggers the reboot command")
|
||||
rootCmd.PersistentFlags().StringVar(&preferNoScheduleTaintName, "prefer-no-schedule-taint", "",
|
||||
"Taint name applied during pending node reboot (to prevent receiving additional pods from other rebooting nodes). Disabled by default. Set e.g. to \"weave.works/kured-node-reboot\" to enable tainting.")
|
||||
rootCmd.PersistentFlags().StringVar(&rebootSentinelCommand, "reboot-sentinel-command", "",
|
||||
"command for which a zero return code will trigger a reboot command")
|
||||
rootCmd.PersistentFlags().StringVar(&rebootCommand, "reboot-command", "/bin/systemctl reboot",
|
||||
"command to run when a reboot is required")
|
||||
rootCmd.PersistentFlags().IntVar(&concurrency, "concurrency", 1,
|
||||
"amount of nodes to concurrently reboot. Defaults to 1")
|
||||
|
||||
rootCmd.PersistentFlags().StringVar(&slackHookURL, "slack-hook-url", "",
|
||||
"slack hook URL for reboot notfications")
|
||||
"slack hook URL for reboot notifications [deprecated in favor of --notify-url]")
|
||||
rootCmd.PersistentFlags().StringVar(&slackUsername, "slack-username", "kured",
|
||||
"slack username for reboot notfications")
|
||||
"slack username for reboot notifications")
|
||||
rootCmd.PersistentFlags().StringVar(&slackChannel, "slack-channel", "",
|
||||
"slack channel for reboot notifications")
|
||||
rootCmd.PersistentFlags().StringVar(¬ifyURL, "notify-url", "",
|
||||
"notify URL for reboot notifications (cannot use with --slack-hook-url flags)")
|
||||
rootCmd.PersistentFlags().StringVar(&messageTemplateUncordon, "message-template-uncordon", "Node %s rebooted & uncordoned successfully!",
|
||||
"message template used to notify about a node being successfully uncordoned")
|
||||
rootCmd.PersistentFlags().StringVar(&messageTemplateDrain, "message-template-drain", "Draining node %s",
|
||||
"message template used to notify about a node being drained")
|
||||
rootCmd.PersistentFlags().StringVar(&messageTemplateReboot, "message-template-reboot", "Rebooting node %s",
|
||||
"message template used to notify about a node being rebooted")
|
||||
|
||||
if err := rootCmd.Execute(); err != nil {
|
||||
log.Fatal(err)
|
||||
rootCmd.PersistentFlags().StringArrayVar(&podSelectors, "blocking-pod-selector", nil,
|
||||
"label selector identifying pods whose presence should prevent reboots")
|
||||
|
||||
rootCmd.PersistentFlags().StringSliceVar(&rebootDays, "reboot-days", timewindow.EveryDay,
|
||||
"schedule reboot on these days")
|
||||
rootCmd.PersistentFlags().StringVar(&rebootStart, "start-time", "0:00",
|
||||
"schedule reboot only after this time of day")
|
||||
rootCmd.PersistentFlags().StringVar(&rebootEnd, "end-time", "23:59:59",
|
||||
"schedule reboot only before this time of day")
|
||||
rootCmd.PersistentFlags().StringVar(&timezone, "time-zone", "UTC",
|
||||
"use this timezone for schedule inputs")
|
||||
|
||||
rootCmd.PersistentFlags().BoolVar(&annotateNodes, "annotate-nodes", false,
|
||||
"if set, the annotations 'weave.works/kured-reboot-in-progress' and 'weave.works/kured-most-recent-reboot-needed' will be given to nodes undergoing kured reboots")
|
||||
|
||||
rootCmd.PersistentFlags().StringVar(&logFormat, "log-format", "text",
|
||||
"use text or json log format")
|
||||
|
||||
rootCmd.PersistentFlags().StringSliceVar(&preRebootNodeLabels, "pre-reboot-node-labels", nil,
|
||||
"labels to add to nodes before cordoning")
|
||||
rootCmd.PersistentFlags().StringSliceVar(&postRebootNodeLabels, "post-reboot-node-labels", nil,
|
||||
"labels to add to nodes after uncordoning")
|
||||
|
||||
return rootCmd
|
||||
}
|
||||
|
||||
// func that checks for deprecated slack-notification-related flags and node labels that do not match
|
||||
func flagCheck(cmd *cobra.Command, args []string) {
|
||||
if slackHookURL != "" && notifyURL != "" {
|
||||
log.Warnf("Cannot use both --notify-url and --slack-hook-url flags. Kured will use --notify-url flag only...")
|
||||
}
|
||||
if notifyURL != "" {
|
||||
notifyURL = stripQuotes(notifyURL)
|
||||
} else if slackHookURL != "" {
|
||||
slackHookURL = stripQuotes(slackHookURL)
|
||||
log.Warnf("Deprecated flag(s). Please use --notify-url flag instead.")
|
||||
trataURL, err := url.Parse(slackHookURL)
|
||||
if err != nil {
|
||||
log.Warnf("slack-hook-url is not properly formatted... no notification will be sent: %v\n", err)
|
||||
}
|
||||
if len(strings.Split(strings.Trim(trataURL.Path, "/services/"), "/")) != 3 {
|
||||
log.Warnf("slack-hook-url is not properly formatted... no notification will be sent: unexpected number of / in URL\n")
|
||||
} else {
|
||||
notifyURL = fmt.Sprintf("slack://%s", strings.Trim(trataURL.Path, "/services/"))
|
||||
}
|
||||
}
|
||||
var preRebootNodeLabelKeys, postRebootNodeLabelKeys []string
|
||||
for _, label := range preRebootNodeLabels {
|
||||
preRebootNodeLabelKeys = append(preRebootNodeLabelKeys, strings.Split(label, "=")[0])
|
||||
}
|
||||
for _, label := range postRebootNodeLabels {
|
||||
postRebootNodeLabelKeys = append(postRebootNodeLabelKeys, strings.Split(label, "=")[0])
|
||||
}
|
||||
sort.Strings(preRebootNodeLabelKeys)
|
||||
sort.Strings(postRebootNodeLabelKeys)
|
||||
if !reflect.DeepEqual(preRebootNodeLabelKeys, postRebootNodeLabelKeys) {
|
||||
log.Warnf("pre-reboot-node-labels keys and post-reboot-node-labels keys do not match. This may result in unexpected behaviour.")
|
||||
}
|
||||
}
|
||||
|
||||
// stripQuotes removes any literal single or double quote chars that surround a string
|
||||
func stripQuotes(str string) string {
|
||||
if len(str) > 2 {
|
||||
firstChar := str[0]
|
||||
lastChar := str[len(str)-1]
|
||||
if firstChar == lastChar && (firstChar == '"' || firstChar == '\'') {
|
||||
return str[1 : len(str)-1]
|
||||
}
|
||||
}
|
||||
// return the original string if it has a length of zero or one
|
||||
return str
|
||||
}
|
||||
|
||||
// bindViper initializes viper and binds command flags with environment variables
|
||||
func bindViper(cmd *cobra.Command, args []string) error {
|
||||
v := viper.New()
|
||||
|
||||
v.SetEnvPrefix(EnvPrefix)
|
||||
v.AutomaticEnv()
|
||||
bindFlags(cmd, v)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// bindFlags binds each cobra flag to its associated viper configuration (environment variable)
|
||||
func bindFlags(cmd *cobra.Command, v *viper.Viper) {
|
||||
cmd.Flags().VisitAll(func(f *pflag.Flag) {
|
||||
// Environment variables can't have dashes in them, so bind them to their equivalent keys with underscores
|
||||
if strings.Contains(f.Name, "-") {
|
||||
v.BindEnv(f.Name, flagToEnvVar(f.Name))
|
||||
}
|
||||
|
||||
// Apply the viper config value to the flag when the flag is not set and viper has a value
|
||||
if !f.Changed && v.IsSet(f.Name) {
|
||||
val := v.Get(f.Name)
|
||||
log.Infof("Binding %s command flag to environment variable: %s", f.Name, flagToEnvVar(f.Name))
|
||||
cmd.Flags().Set(f.Name, fmt.Sprintf("%v", val))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// flagToEnvVar converts command flag name to equivalent environment variable name
|
||||
func flagToEnvVar(flag string) string {
|
||||
envVarSuffix := strings.ToUpper(strings.ReplaceAll(flag, "-", "_"))
|
||||
return fmt.Sprintf("%s_%s", EnvPrefix, envVarSuffix)
|
||||
}
|
||||
|
||||
// newCommand creates a new Command with stdout/stderr wired to our standard logger
|
||||
func newCommand(name string, arg ...string) *exec.Cmd {
|
||||
cmd := exec.Command(name, arg...)
|
||||
|
||||
cmd.Stdout = log.NewEntry(log.StandardLogger()).
|
||||
WithField("cmd", cmd.Args[0]).
|
||||
WithField("std", "out").
|
||||
@@ -96,50 +312,130 @@ func newCommand(name string, arg ...string) *exec.Cmd {
|
||||
return cmd
|
||||
}
|
||||
|
||||
func sentinelExists() bool {
|
||||
_, err := os.Stat(rebootSentinel)
|
||||
switch {
|
||||
case err == nil:
|
||||
return true
|
||||
case os.IsNotExist(err):
|
||||
return false
|
||||
default:
|
||||
log.Fatalf("Unable to determine existence of sentinel: %v", err)
|
||||
return false // unreachable; prevents compilation error
|
||||
}
|
||||
// buildHostCommand writes a new command to run in the host namespace
|
||||
// Rancher based need different pid
|
||||
func buildHostCommand(pid int, command []string) []string {
|
||||
|
||||
// From the container, we nsenter into the proper PID to run the hostCommand.
|
||||
// For this, kured daemonset need to be configured with hostPID:true and privileged:true
|
||||
cmd := []string{"/usr/bin/nsenter", fmt.Sprintf("-m/proc/%d/ns/mnt", pid), "--"}
|
||||
cmd = append(cmd, command...)
|
||||
return cmd
|
||||
}
|
||||
|
||||
func rebootRequired() bool {
|
||||
if sentinelExists() {
|
||||
log.Infof("Reboot required")
|
||||
return true
|
||||
} else {
|
||||
log.Infof("Reboot not required")
|
||||
return false
|
||||
func rebootRequired(sentinelCommand []string) bool {
|
||||
cmd := newCommand(sentinelCommand[0], sentinelCommand[1:]...)
|
||||
if err := cmd.Run(); err != nil {
|
||||
switch err := err.(type) {
|
||||
case *exec.ExitError:
|
||||
// We assume a non-zero exit code means 'reboot not required', but of course
|
||||
// the user could have misconfigured the sentinel command or something else
|
||||
// went wrong during its execution. In that case, not entering a reboot loop
|
||||
// is the right thing to do, and we are logging stdout/stderr of the command
|
||||
// so it should be obvious what is wrong.
|
||||
if cmd.ProcessState.ExitCode() != 1 {
|
||||
log.Warnf("sentinel command ended with unexpected exit code: %v", cmd.ProcessState.ExitCode())
|
||||
}
|
||||
return false
|
||||
default:
|
||||
// Something was grossly misconfigured, such as the command path being wrong.
|
||||
log.Fatalf("Error invoking sentinel command: %v", err)
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func rebootBlocked() bool {
|
||||
if prometheusURL != "" {
|
||||
alertNames, err := alerts.PrometheusActiveAlerts(prometheusURL, alertFilter)
|
||||
// RebootBlocker interface should be implemented by types
|
||||
// to know if their instantiations should block a reboot
|
||||
type RebootBlocker interface {
|
||||
isBlocked() bool
|
||||
}
|
||||
|
||||
// PrometheusBlockingChecker contains info for connecting
|
||||
// to prometheus, and can give info about whether a reboot should be blocked
|
||||
type PrometheusBlockingChecker struct {
|
||||
// prometheusClient to make prometheus-go-client and api config available
|
||||
// into the PrometheusBlockingChecker struct
|
||||
promClient *alerts.PromClient
|
||||
// regexp used to get alerts
|
||||
filter *regexp.Regexp
|
||||
// bool to indicate if only firing alerts should be considered
|
||||
firingOnly bool
|
||||
// bool to indicate that we're only blocking on alerts which match the filter
|
||||
filterMatchOnly bool
|
||||
}
|
||||
|
||||
// KubernetesBlockingChecker contains info for connecting
|
||||
// to k8s, and can give info about whether a reboot should be blocked
|
||||
type KubernetesBlockingChecker struct {
|
||||
// client used to contact kubernetes API
|
||||
client *kubernetes.Clientset
|
||||
nodename string
|
||||
// lised used to filter pods (podSelector)
|
||||
filter []string
|
||||
}
|
||||
|
||||
func (pb PrometheusBlockingChecker) isBlocked() bool {
|
||||
alertNames, err := pb.promClient.ActiveAlerts(pb.filter, pb.firingOnly, pb.filterMatchOnly)
|
||||
if err != nil {
|
||||
log.Warnf("Reboot blocked: prometheus query error: %v", err)
|
||||
return true
|
||||
}
|
||||
count := len(alertNames)
|
||||
if count > 10 {
|
||||
alertNames = append(alertNames[:10], "...")
|
||||
}
|
||||
if count > 0 {
|
||||
log.Warnf("Reboot blocked: %d active alerts: %v", count, alertNames)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (kb KubernetesBlockingChecker) isBlocked() bool {
|
||||
fieldSelector := fmt.Sprintf("spec.nodeName=%s,status.phase!=Succeeded,status.phase!=Failed,status.phase!=Unknown", kb.nodename)
|
||||
for _, labelSelector := range kb.filter {
|
||||
podList, err := kb.client.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{
|
||||
LabelSelector: labelSelector,
|
||||
FieldSelector: fieldSelector,
|
||||
Limit: 10})
|
||||
if err != nil {
|
||||
log.Warnf("Reboot blocked: prometheus query error: %v", err)
|
||||
log.Warnf("Reboot blocked: pod query error: %v", err)
|
||||
return true
|
||||
}
|
||||
count := len(alertNames)
|
||||
if count > 10 {
|
||||
alertNames = append(alertNames[:10], "...")
|
||||
}
|
||||
if count > 0 {
|
||||
log.Warnf("Reboot blocked: %d active alerts: %v", count, alertNames)
|
||||
|
||||
if len(podList.Items) > 0 {
|
||||
podNames := make([]string, 0, len(podList.Items))
|
||||
for _, pod := range podList.Items {
|
||||
podNames = append(podNames, pod.Name)
|
||||
}
|
||||
if len(podList.Continue) > 0 {
|
||||
podNames = append(podNames, "...")
|
||||
}
|
||||
log.Warnf("Reboot blocked: matching pods: %v", podNames)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func holding(lock *daemonsetlock.DaemonSetLock, metadata interface{}) bool {
|
||||
holding, err := lock.Test(metadata)
|
||||
func rebootBlocked(blockers ...RebootBlocker) bool {
|
||||
for _, blocker := range blockers {
|
||||
if blocker.isBlocked() {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func holding(lock *daemonsetlock.DaemonSetLock, metadata interface{}, isMultiLock bool) bool {
|
||||
var holding bool
|
||||
var err error
|
||||
if isMultiLock {
|
||||
holding, err = lock.TestMultiple()
|
||||
} else {
|
||||
holding, err = lock.Test(metadata)
|
||||
}
|
||||
if err != nil {
|
||||
log.Fatalf("Error testing lock: %v", err)
|
||||
}
|
||||
@@ -149,8 +445,17 @@ func holding(lock *daemonsetlock.DaemonSetLock, metadata interface{}) bool {
|
||||
return holding
|
||||
}
|
||||
|
||||
func acquire(lock *daemonsetlock.DaemonSetLock, metadata interface{}) bool {
|
||||
holding, holder, err := lock.Acquire(metadata)
|
||||
func acquire(lock *daemonsetlock.DaemonSetLock, metadata interface{}, TTL time.Duration, maxOwners int) bool {
|
||||
var holding bool
|
||||
var holder string
|
||||
var err error
|
||||
if maxOwners > 1 {
|
||||
var holders []string
|
||||
holding, holders, err = lock.AcquireMultiple(metadata, TTL, maxOwners)
|
||||
holder = strings.Join(holders, ",")
|
||||
} else {
|
||||
holding, holder, err = lock.Acquire(metadata, TTL)
|
||||
}
|
||||
switch {
|
||||
case err != nil:
|
||||
log.Fatalf("Error acquiring lock: %v", err)
|
||||
@@ -164,50 +469,103 @@ func acquire(lock *daemonsetlock.DaemonSetLock, metadata interface{}) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func release(lock *daemonsetlock.DaemonSetLock) {
|
||||
func throttle(releaseDelay time.Duration) {
|
||||
if releaseDelay > 0 {
|
||||
log.Infof("Delaying lock release by %v", releaseDelay)
|
||||
time.Sleep(releaseDelay)
|
||||
}
|
||||
}
|
||||
|
||||
func release(lock *daemonsetlock.DaemonSetLock, isMultiLock bool) {
|
||||
log.Infof("Releasing lock")
|
||||
if err := lock.Release(); err != nil {
|
||||
|
||||
var err error
|
||||
if isMultiLock {
|
||||
err = lock.ReleaseMultiple()
|
||||
} else {
|
||||
err = lock.Release()
|
||||
}
|
||||
if err != nil {
|
||||
log.Fatalf("Error releasing lock: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func drain(nodeID string) {
|
||||
log.Infof("Draining node %s", nodeID)
|
||||
drainCmd := newCommand("/usr/bin/kubectl", "drain",
|
||||
"--ignore-daemonsets", "--delete-local-data", "--force", nodeID)
|
||||
func drain(client *kubernetes.Clientset, node *v1.Node) error {
|
||||
nodename := node.GetName()
|
||||
|
||||
if err := drainCmd.Run(); err != nil {
|
||||
log.Fatalf("Error invoking drain command: %v", err)
|
||||
if preRebootNodeLabels != nil {
|
||||
updateNodeLabels(client, node, preRebootNodeLabels)
|
||||
}
|
||||
}
|
||||
|
||||
func uncordon(nodeID string) {
|
||||
log.Infof("Uncordoning node %s", nodeID)
|
||||
uncordonCmd := newCommand("/usr/bin/kubectl", "uncordon", nodeID)
|
||||
if err := uncordonCmd.Run(); err != nil {
|
||||
log.Fatalf("Error invoking uncordon command: %v", err)
|
||||
}
|
||||
}
|
||||
log.Infof("Draining node %s", nodename)
|
||||
|
||||
func commandReboot(nodeID string) {
|
||||
log.Infof("Commanding reboot")
|
||||
|
||||
if slackHookURL != "" {
|
||||
if err := slack.NotifyReboot(slackHookURL, slackUsername, nodeID); err != nil {
|
||||
log.Warnf("Error notifying slack: %v", err)
|
||||
if notifyURL != "" {
|
||||
if err := shoutrrr.Send(notifyURL, fmt.Sprintf(messageTemplateDrain, nodename)); err != nil {
|
||||
log.Warnf("Error notifying: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Relies on /var/run/dbus/system_bus_socket bind mount to talk to systemd
|
||||
rebootCmd := newCommand("/bin/systemctl", "reboot")
|
||||
if err := rebootCmd.Run(); err != nil {
|
||||
drainer := &kubectldrain.Helper{
|
||||
Client: client,
|
||||
Ctx: context.Background(),
|
||||
GracePeriodSeconds: drainGracePeriod,
|
||||
PodSelector: drainPodSelector,
|
||||
SkipWaitForDeleteTimeoutSeconds: skipWaitForDeleteTimeoutSeconds,
|
||||
Force: true,
|
||||
DeleteEmptyDirData: true,
|
||||
IgnoreAllDaemonSets: true,
|
||||
ErrOut: os.Stderr,
|
||||
Out: os.Stdout,
|
||||
Timeout: drainTimeout,
|
||||
}
|
||||
|
||||
if err := kubectldrain.RunCordonOrUncordon(drainer, node, true); err != nil {
|
||||
log.Errorf("Error cordonning %s: %v", nodename, err)
|
||||
return err
|
||||
}
|
||||
|
||||
if err := kubectldrain.RunNodeDrain(drainer, nodename); err != nil {
|
||||
log.Errorf("Error draining %s: %v", nodename, err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func uncordon(client *kubernetes.Clientset, node *v1.Node) error {
|
||||
nodename := node.GetName()
|
||||
log.Infof("Uncordoning node %s", nodename)
|
||||
drainer := &kubectldrain.Helper{
|
||||
Client: client,
|
||||
ErrOut: os.Stderr,
|
||||
Out: os.Stdout,
|
||||
Ctx: context.Background(),
|
||||
}
|
||||
if err := kubectldrain.RunCordonOrUncordon(drainer, node, false); err != nil {
|
||||
log.Fatalf("Error uncordonning %s: %v", nodename, err)
|
||||
return err
|
||||
} else if postRebootNodeLabels != nil {
|
||||
updateNodeLabels(client, node, postRebootNodeLabels)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func invokeReboot(nodeID string, rebootCommand []string) {
|
||||
log.Infof("Running command: %s for node: %s", rebootCommand, nodeID)
|
||||
|
||||
if notifyURL != "" {
|
||||
if err := shoutrrr.Send(notifyURL, fmt.Sprintf(messageTemplateReboot, nodeID)); err != nil {
|
||||
log.Warnf("Error notifying: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := newCommand(rebootCommand[0], rebootCommand[1:]...).Run(); err != nil {
|
||||
log.Fatalf("Error invoking reboot command: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func maintainRebootRequiredMetric(nodeID string) {
|
||||
func maintainRebootRequiredMetric(nodeID string, sentinelCommand []string) {
|
||||
for {
|
||||
if sentinelExists() {
|
||||
if rebootRequired(sentinelCommand) {
|
||||
rebootRequiredGauge.WithLabelValues(nodeID).Set(1)
|
||||
} else {
|
||||
rebootRequiredGauge.WithLabelValues(nodeID).Set(0)
|
||||
@@ -221,7 +579,81 @@ type nodeMeta struct {
|
||||
Unschedulable bool `json:"unschedulable"`
|
||||
}
|
||||
|
||||
func rebootAsRequired(nodeID string) {
|
||||
func addNodeAnnotations(client *kubernetes.Clientset, nodeID string, annotations map[string]string) error {
|
||||
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
log.Errorf("Error retrieving node object via k8s API: %s", err)
|
||||
return err
|
||||
}
|
||||
for k, v := range annotations {
|
||||
node.Annotations[k] = v
|
||||
log.Infof("Adding node %s annotation: %s=%s", node.GetName(), k, v)
|
||||
}
|
||||
|
||||
bytes, err := json.Marshal(node)
|
||||
if err != nil {
|
||||
log.Errorf("Error marshalling node object into JSON: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = client.CoreV1().Nodes().Patch(context.TODO(), node.GetName(), types.StrategicMergePatchType, bytes, metav1.PatchOptions{})
|
||||
if err != nil {
|
||||
var annotationsErr string
|
||||
for k, v := range annotations {
|
||||
annotationsErr += fmt.Sprintf("%s=%s ", k, v)
|
||||
}
|
||||
log.Errorf("Error adding node annotations %s via k8s API: %v", annotationsErr, err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func deleteNodeAnnotation(client *kubernetes.Clientset, nodeID, key string) error {
|
||||
log.Infof("Deleting node %s annotation %s", nodeID, key)
|
||||
|
||||
// JSON Patch takes as path input a JSON Pointer, defined in RFC6901
|
||||
// So we replace all instances of "/" with "~1" as per:
|
||||
// https://tools.ietf.org/html/rfc6901#section-3
|
||||
patch := []byte(fmt.Sprintf("[{\"op\":\"remove\",\"path\":\"/metadata/annotations/%s\"}]", strings.ReplaceAll(key, "/", "~1")))
|
||||
_, err := client.CoreV1().Nodes().Patch(context.TODO(), nodeID, types.JSONPatchType, patch, metav1.PatchOptions{})
|
||||
if err != nil {
|
||||
log.Errorf("Error deleting node annotation %s via k8s API: %v", key, err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func updateNodeLabels(client *kubernetes.Clientset, node *v1.Node, labels []string) {
|
||||
labelsMap := make(map[string]string)
|
||||
for _, label := range labels {
|
||||
k := strings.Split(label, "=")[0]
|
||||
v := strings.Split(label, "=")[1]
|
||||
labelsMap[k] = v
|
||||
log.Infof("Updating node %s label: %s=%s", node.GetName(), k, v)
|
||||
}
|
||||
|
||||
bytes, err := json.Marshal(map[string]interface{}{
|
||||
"metadata": map[string]interface{}{
|
||||
"labels": labelsMap,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatalf("Error marshalling node object into JSON: %v", err)
|
||||
}
|
||||
|
||||
_, err = client.CoreV1().Nodes().Patch(context.TODO(), node.GetName(), types.StrategicMergePatchType, bytes, metav1.PatchOptions{})
|
||||
if err != nil {
|
||||
var labelsErr string
|
||||
for _, label := range labels {
|
||||
k := strings.Split(label, "=")[0]
|
||||
v := strings.Split(label, "=")[1]
|
||||
labelsErr += fmt.Sprintf("%s=%s ", k, v)
|
||||
}
|
||||
log.Errorf("Error updating node labels %s via k8s API: %v", labelsErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []string, window *timewindow.TimeWindow, TTL time.Duration, releaseDelay time.Duration) {
|
||||
config, err := rest.InClusterConfig()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
@@ -235,52 +667,217 @@ func rebootAsRequired(nodeID string) {
|
||||
lock := daemonsetlock.New(client, nodeID, dsNamespace, dsName, lockAnnotation)
|
||||
|
||||
nodeMeta := nodeMeta{}
|
||||
if holding(lock, &nodeMeta) {
|
||||
if !nodeMeta.Unschedulable {
|
||||
uncordon(nodeID)
|
||||
source := rand.NewSource(time.Now().UnixNano())
|
||||
tick := delaytick.New(source, 1*time.Minute)
|
||||
for range tick {
|
||||
if holding(lock, &nodeMeta, concurrency > 1) {
|
||||
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
log.Errorf("Error retrieving node object via k8s API: %v", err)
|
||||
continue
|
||||
}
|
||||
if !nodeMeta.Unschedulable {
|
||||
err = uncordon(client, node)
|
||||
if err != nil {
|
||||
log.Errorf("Unable to uncordon %s: %v, will continue to hold lock and retry uncordon", node.GetName(), err)
|
||||
continue
|
||||
} else {
|
||||
if notifyURL != "" {
|
||||
if err := shoutrrr.Send(notifyURL, fmt.Sprintf(messageTemplateUncordon, nodeID)); err != nil {
|
||||
log.Warnf("Error notifying: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// If we're holding the lock we know we've tried, in a prior run, to reboot
|
||||
// So (1) we want to confirm that the reboot succeeded practically ( !rebootRequired() )
|
||||
// And (2) check if we previously annotated the node that it was in the process of being rebooted,
|
||||
// And finally (3) if it has that annotation, to delete it.
|
||||
// This indicates to other node tools running on the cluster that this node may be a candidate for maintenance
|
||||
if annotateNodes && !rebootRequired(sentinelCommand) {
|
||||
if _, ok := node.Annotations[KuredRebootInProgressAnnotation]; ok {
|
||||
err := deleteNodeAnnotation(client, nodeID, KuredRebootInProgressAnnotation)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
throttle(releaseDelay)
|
||||
release(lock, concurrency > 1)
|
||||
break
|
||||
} else {
|
||||
break
|
||||
}
|
||||
release(lock)
|
||||
}
|
||||
|
||||
source := rand.NewSource(time.Now().UnixNano())
|
||||
tick := delaytick.New(source, period)
|
||||
for _ = range tick {
|
||||
if rebootRequired() && !rebootBlocked() {
|
||||
node, err := client.CoreV1().Nodes().Get(nodeID, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
nodeMeta.Unschedulable = node.Spec.Unschedulable
|
||||
preferNoScheduleTaint := taints.New(client, nodeID, preferNoScheduleTaintName, v1.TaintEffectPreferNoSchedule)
|
||||
|
||||
if acquire(lock, &nodeMeta) {
|
||||
if !nodeMeta.Unschedulable {
|
||||
drain(nodeID)
|
||||
}
|
||||
commandReboot(nodeID)
|
||||
for {
|
||||
log.Infof("Waiting for reboot")
|
||||
time.Sleep(time.Minute)
|
||||
// Remove taint immediately during startup to quickly allow scheduling again.
|
||||
if !rebootRequired(sentinelCommand) {
|
||||
preferNoScheduleTaint.Disable()
|
||||
}
|
||||
|
||||
// instantiate prometheus client
|
||||
promClient, err := alerts.NewPromClient(papi.Config{Address: prometheusURL})
|
||||
if err != nil {
|
||||
log.Fatal("Unable to create prometheus client: ", err)
|
||||
}
|
||||
|
||||
source = rand.NewSource(time.Now().UnixNano())
|
||||
tick = delaytick.New(source, period)
|
||||
for range tick {
|
||||
if !window.Contains(time.Now()) {
|
||||
// Remove taint outside the reboot time window to allow for normal operation.
|
||||
preferNoScheduleTaint.Disable()
|
||||
continue
|
||||
}
|
||||
|
||||
if !rebootRequired(sentinelCommand) {
|
||||
log.Infof("Reboot not required")
|
||||
preferNoScheduleTaint.Disable()
|
||||
continue
|
||||
}
|
||||
|
||||
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
log.Fatalf("Error retrieving node object via k8s API: %v", err)
|
||||
}
|
||||
nodeMeta.Unschedulable = node.Spec.Unschedulable
|
||||
|
||||
var timeNowString string
|
||||
if annotateNodes {
|
||||
if _, ok := node.Annotations[KuredRebootInProgressAnnotation]; !ok {
|
||||
timeNowString = time.Now().Format(time.RFC3339)
|
||||
// Annotate this node to indicate that "I am going to be rebooted!"
|
||||
// so that other node maintenance tools running on the cluster are aware that this node is in the process of a "state transition"
|
||||
annotations := map[string]string{KuredRebootInProgressAnnotation: timeNowString}
|
||||
// & annotate this node with a timestamp so that other node maintenance tools know how long it's been since this node has been marked for reboot
|
||||
annotations[KuredMostRecentRebootNeededAnnotation] = timeNowString
|
||||
err := addNodeAnnotations(client, nodeID, annotations)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blockCheckers []RebootBlocker
|
||||
if prometheusURL != "" {
|
||||
blockCheckers = append(blockCheckers, PrometheusBlockingChecker{promClient: promClient, filter: alertFilter, firingOnly: alertFiringOnly, filterMatchOnly: alertFilterMatchOnly})
|
||||
}
|
||||
if podSelectors != nil {
|
||||
blockCheckers = append(blockCheckers, KubernetesBlockingChecker{client: client, nodename: nodeID, filter: podSelectors})
|
||||
}
|
||||
|
||||
var rebootRequiredBlockCondition string
|
||||
if rebootBlocked(blockCheckers...) {
|
||||
rebootRequiredBlockCondition = ", but blocked at this time"
|
||||
continue
|
||||
}
|
||||
log.Infof("Reboot required%s", rebootRequiredBlockCondition)
|
||||
|
||||
if !holding(lock, &nodeMeta, concurrency > 1) && !acquire(lock, &nodeMeta, TTL, concurrency) {
|
||||
// Prefer to not schedule pods onto this node to avoid draing the same pod multiple times.
|
||||
preferNoScheduleTaint.Enable()
|
||||
continue
|
||||
}
|
||||
|
||||
err = drain(client, node)
|
||||
if err != nil {
|
||||
if !forceReboot {
|
||||
log.Errorf("Unable to cordon or drain %s: %v, will release lock and retry cordon and drain before rebooting when lock is next acquired", node.GetName(), err)
|
||||
release(lock, concurrency > 1)
|
||||
log.Infof("Performing a best-effort uncordon after failed cordon and drain")
|
||||
uncordon(client, node)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if rebootDelay > 0 {
|
||||
log.Infof("Delaying reboot for %v", rebootDelay)
|
||||
time.Sleep(rebootDelay)
|
||||
}
|
||||
|
||||
invokeReboot(nodeID, rebootCommand)
|
||||
for {
|
||||
log.Infof("Waiting for reboot")
|
||||
time.Sleep(time.Minute)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// buildSentinelCommand creates the shell command line which will need wrapping to escape
|
||||
// the container boundaries
|
||||
func buildSentinelCommand(rebootSentinelFile string, rebootSentinelCommand string) []string {
|
||||
if rebootSentinelCommand != "" {
|
||||
cmd, err := shlex.Split(rebootSentinelCommand)
|
||||
if err != nil {
|
||||
log.Fatalf("Error parsing provided sentinel command: %v", err)
|
||||
}
|
||||
return cmd
|
||||
}
|
||||
return []string{"test", "-f", rebootSentinelFile}
|
||||
}
|
||||
|
||||
// parseRebootCommand creates the shell command line which will need wrapping to escape
|
||||
// the container boundaries
|
||||
func parseRebootCommand(rebootCommand string) []string {
|
||||
command, err := shlex.Split(rebootCommand)
|
||||
if err != nil {
|
||||
log.Fatalf("Error parsing provided reboot command: %v", err)
|
||||
}
|
||||
return command
|
||||
}
|
||||
|
||||
func root(cmd *cobra.Command, args []string) {
|
||||
if logFormat == "json" {
|
||||
log.SetFormatter(&log.JSONFormatter{})
|
||||
}
|
||||
|
||||
log.Infof("Kubernetes Reboot Daemon: %s", version)
|
||||
|
||||
nodeID := os.Getenv("KURED_NODE_ID")
|
||||
if nodeID == "" {
|
||||
log.Fatal("KURED_NODE_ID environment variable required")
|
||||
}
|
||||
|
||||
window, err := timewindow.New(rebootDays, rebootStart, rebootEnd, timezone)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to build time window: %v", err)
|
||||
}
|
||||
|
||||
sentinelCommand := buildSentinelCommand(rebootSentinelFile, rebootSentinelCommand)
|
||||
restartCommand := parseRebootCommand(rebootCommand)
|
||||
|
||||
log.Infof("Node ID: %s", nodeID)
|
||||
log.Infof("Lock Annotation: %s/%s:%s", dsNamespace, dsName, lockAnnotation)
|
||||
log.Infof("Reboot Sentinel: %s every %v", rebootSentinel, period)
|
||||
if lockTTL > 0 {
|
||||
log.Infof("Lock TTL set, lock will expire after: %v", lockTTL)
|
||||
} else {
|
||||
log.Info("Lock TTL not set, lock will remain until being released")
|
||||
}
|
||||
if lockReleaseDelay > 0 {
|
||||
log.Infof("Lock release delay set, lock release will be delayed by: %v", lockReleaseDelay)
|
||||
} else {
|
||||
log.Info("Lock release delay not set, lock will be released immediately after rebooting")
|
||||
}
|
||||
log.Infof("PreferNoSchedule taint: %s", preferNoScheduleTaintName)
|
||||
log.Infof("Blocking Pod Selectors: %v", podSelectors)
|
||||
log.Infof("Reboot schedule: %v", window)
|
||||
log.Infof("Reboot check command: %s every %v", sentinelCommand, period)
|
||||
log.Infof("Concurrency: %v", concurrency)
|
||||
log.Infof("Reboot command: %s", restartCommand)
|
||||
if annotateNodes {
|
||||
log.Infof("Will annotate nodes during kured reboot operations")
|
||||
}
|
||||
|
||||
go rebootAsRequired(nodeID)
|
||||
go maintainRebootRequiredMetric(nodeID)
|
||||
// To run those commands as it was the host, we'll use nsenter
|
||||
// Relies on hostPID:true and privileged:true to enter host mount space
|
||||
// PID set to 1, until we have a better discovery mechanism.
|
||||
hostSentinelCommand := buildHostCommand(1, sentinelCommand)
|
||||
hostRestartCommand := buildHostCommand(1, restartCommand)
|
||||
|
||||
go rebootAsRequired(nodeID, hostRestartCommand, hostSentinelCommand, window, lockTTL, lockReleaseDelay)
|
||||
go maintainRebootRequiredMetric(nodeID, hostSentinelCommand)
|
||||
|
||||
http.Handle("/metrics", promhttp.Handler())
|
||||
log.Fatal(http.ListenAndServe(":8080", nil))
|
||||
log.Fatal(http.ListenAndServe(fmt.Sprintf("%s:%d", metricsHost, metricsPort), nil))
|
||||
}
|
||||
|
||||
310
cmd/kured/main_test.go
Normal file
310
cmd/kured/main_test.go
Normal file
@@ -0,0 +1,310 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/kubereboot/kured/pkg/alerts"
|
||||
assert "gotest.tools/v3/assert"
|
||||
|
||||
papi "github.com/prometheus/client_golang/api"
|
||||
)
|
||||
|
||||
type BlockingChecker struct {
|
||||
blocking bool
|
||||
}
|
||||
|
||||
func (fbc BlockingChecker) isBlocked() bool {
|
||||
return fbc.blocking
|
||||
}
|
||||
|
||||
var _ RebootBlocker = BlockingChecker{} // Verify that Type implements Interface.
|
||||
var _ RebootBlocker = (*BlockingChecker)(nil) // Verify that *Type implements Interface.
|
||||
|
||||
func Test_flagCheck(t *testing.T) {
|
||||
var cmd *cobra.Command
|
||||
var args []string
|
||||
slackHookURL = "https://hooks.slack.com/services/BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET"
|
||||
expected := "slack://BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET"
|
||||
flagCheck(cmd, args)
|
||||
if notifyURL != expected {
|
||||
t.Errorf("Slack URL Parsing is wrong: expecting %s but got %s\n", expected, notifyURL)
|
||||
}
|
||||
|
||||
// validate that surrounding quotes are stripped
|
||||
slackHookURL = "\"https://hooks.slack.com/services/BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET\""
|
||||
expected = "slack://BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET"
|
||||
flagCheck(cmd, args)
|
||||
if notifyURL != expected {
|
||||
t.Errorf("Slack URL Parsing is wrong: expecting %s but got %s\n", expected, notifyURL)
|
||||
}
|
||||
slackHookURL = "'https://hooks.slack.com/services/BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET'"
|
||||
expected = "slack://BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET"
|
||||
flagCheck(cmd, args)
|
||||
if notifyURL != expected {
|
||||
t.Errorf("Slack URL Parsing is wrong: expecting %s but got %s\n", expected, notifyURL)
|
||||
}
|
||||
slackHookURL = ""
|
||||
notifyURL = "\"teams://79b4XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX@acd8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX/204cXXXXXXXXXXXXXXXXXXXXXXXXXXXX/a1f8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX?host=XXXX.webhook.office.com\""
|
||||
expected = "teams://79b4XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX@acd8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX/204cXXXXXXXXXXXXXXXXXXXXXXXXXXXX/a1f8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX?host=XXXX.webhook.office.com"
|
||||
flagCheck(cmd, args)
|
||||
if notifyURL != expected {
|
||||
t.Errorf("notifyURL Parsing is wrong: expecting %s but got %s\n", expected, notifyURL)
|
||||
}
|
||||
notifyURL = "'teams://79b4XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX@acd8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX/204cXXXXXXXXXXXXXXXXXXXXXXXXXXXX/a1f8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX?host=XXXX.webhook.office.com'"
|
||||
expected = "teams://79b4XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX@acd8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX/204cXXXXXXXXXXXXXXXXXXXXXXXXXXXX/a1f8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX?host=XXXX.webhook.office.com"
|
||||
flagCheck(cmd, args)
|
||||
if notifyURL != expected {
|
||||
t.Errorf("notifyURL Parsing is wrong: expecting %s but got %s\n", expected, notifyURL)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_stripQuotes(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "string with no surrounding quotes is unchanged",
|
||||
input: "Hello, world!",
|
||||
expected: "Hello, world!",
|
||||
},
|
||||
{
|
||||
name: "string with surrounding double quotes should strip quotes",
|
||||
input: "\"Hello, world!\"",
|
||||
expected: "Hello, world!",
|
||||
},
|
||||
{
|
||||
name: "string with surrounding single quotes should strip quotes",
|
||||
input: "'Hello, world!'",
|
||||
expected: "Hello, world!",
|
||||
},
|
||||
{
|
||||
name: "string with unbalanced surrounding quotes is unchanged",
|
||||
input: "'Hello, world!\"",
|
||||
expected: "'Hello, world!\"",
|
||||
},
|
||||
{
|
||||
name: "string with length of one is unchanged",
|
||||
input: "'",
|
||||
expected: "'",
|
||||
},
|
||||
{
|
||||
name: "string with length of zero is unchanged",
|
||||
input: "",
|
||||
expected: "",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := stripQuotes(tt.input); !reflect.DeepEqual(got, tt.expected) {
|
||||
t.Errorf("stripQuotes() = %v, expected %v", got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_rebootBlocked(t *testing.T) {
|
||||
noCheckers := []RebootBlocker{}
|
||||
nonblockingChecker := BlockingChecker{blocking: false}
|
||||
blockingChecker := BlockingChecker{blocking: true}
|
||||
|
||||
// Instantiate a prometheusClient with a broken_url
|
||||
promClient, err := alerts.NewPromClient(papi.Config{Address: "broken_url"})
|
||||
if err != nil {
|
||||
log.Fatal("Can't create prometheusClient: ", err)
|
||||
}
|
||||
brokenPrometheusClient := PrometheusBlockingChecker{promClient: promClient, filter: nil, firingOnly: false}
|
||||
|
||||
type args struct {
|
||||
blockers []RebootBlocker
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "Do not block on no blocker defined",
|
||||
args: args{blockers: noCheckers},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "Ensure a blocker blocks",
|
||||
args: args{blockers: []RebootBlocker{blockingChecker}},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "Ensure a non-blocker doesn't block",
|
||||
args: args{blockers: []RebootBlocker{nonblockingChecker}},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "Ensure one blocker is enough to block",
|
||||
args: args{blockers: []RebootBlocker{nonblockingChecker, blockingChecker}},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "Do block on error contacting prometheus API",
|
||||
args: args{blockers: []RebootBlocker{brokenPrometheusClient}},
|
||||
want: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := rebootBlocked(tt.args.blockers...); got != tt.want {
|
||||
t.Errorf("rebootBlocked() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_buildHostCommand(t *testing.T) {
|
||||
type args struct {
|
||||
pid int
|
||||
command []string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want []string
|
||||
}{
|
||||
{
|
||||
name: "Ensure command will run with nsenter",
|
||||
args: args{pid: 1, command: []string{"ls", "-Fal"}},
|
||||
want: []string{"/usr/bin/nsenter", "-m/proc/1/ns/mnt", "--", "ls", "-Fal"},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := buildHostCommand(tt.args.pid, tt.args.command); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("buildHostCommand() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_buildSentinelCommand(t *testing.T) {
|
||||
type args struct {
|
||||
rebootSentinelFile string
|
||||
rebootSentinelCommand string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want []string
|
||||
}{
|
||||
{
|
||||
name: "Ensure a sentinelFile generates a shell 'test' command with the right file",
|
||||
args: args{
|
||||
rebootSentinelFile: "/test1",
|
||||
rebootSentinelCommand: "",
|
||||
},
|
||||
want: []string{"test", "-f", "/test1"},
|
||||
},
|
||||
{
|
||||
name: "Ensure a sentinelCommand has priority over a sentinelFile if both are provided (because sentinelFile is always provided)",
|
||||
args: args{
|
||||
rebootSentinelFile: "/test1",
|
||||
rebootSentinelCommand: "/sbin/reboot-required -r",
|
||||
},
|
||||
want: []string{"/sbin/reboot-required", "-r"},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := buildSentinelCommand(tt.args.rebootSentinelFile, tt.args.rebootSentinelCommand); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("buildSentinelCommand() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_parseRebootCommand(t *testing.T) {
|
||||
type args struct {
|
||||
rebootCommand string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want []string
|
||||
}{
|
||||
{
|
||||
name: "Ensure a reboot command is properly parsed",
|
||||
args: args{
|
||||
rebootCommand: "/sbin/systemctl reboot",
|
||||
},
|
||||
want: []string{"/sbin/systemctl", "reboot"},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := parseRebootCommand(tt.args.rebootCommand); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("parseRebootCommand() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_rebootRequired(t *testing.T) {
|
||||
type args struct {
|
||||
sentinelCommand []string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "Ensure rc = 0 means reboot required",
|
||||
args: args{
|
||||
sentinelCommand: []string{"true"},
|
||||
},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "Ensure rc != 0 means reboot NOT required",
|
||||
args: args{
|
||||
sentinelCommand: []string{"false"},
|
||||
},
|
||||
want: false,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := rebootRequired(tt.args.sentinelCommand); got != tt.want {
|
||||
t.Errorf("rebootRequired() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_rebootRequired_fatals(t *testing.T) {
|
||||
cases := []struct {
|
||||
param []string
|
||||
expectFatal bool
|
||||
}{
|
||||
{
|
||||
param: []string{"true"},
|
||||
expectFatal: false,
|
||||
},
|
||||
{
|
||||
param: []string{"./babar"},
|
||||
expectFatal: true,
|
||||
},
|
||||
}
|
||||
|
||||
defer func() { log.StandardLogger().ExitFunc = nil }()
|
||||
var fatal bool
|
||||
log.StandardLogger().ExitFunc = func(int) { fatal = true }
|
||||
|
||||
for _, c := range cases {
|
||||
fatal = false
|
||||
rebootRequired(c.param)
|
||||
assert.Equal(t, c.expectFatal, fatal)
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
|
||||
"github.com/weaveworks/kured/pkg/alerts"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if len(os.Args) != 3 {
|
||||
log.Fatalf("USAGE: %s <prometheusURL> <filterRegexp>", os.Args[0])
|
||||
}
|
||||
|
||||
count, err := alerts.PrometheusCountActive(os.Args[1], regexp.MustCompile(os.Args[2]))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
fmt.Println(count)
|
||||
}
|
||||
105
go.mod
Normal file
105
go.mod
Normal file
@@ -0,0 +1,105 @@
|
||||
module github.com/kubereboot/kured
|
||||
|
||||
go 1.20
|
||||
|
||||
replace golang.org/x/net => golang.org/x/net v0.7.0
|
||||
|
||||
replace github.com/emicklei/go-restful/v3 => github.com/emicklei/go-restful/v3 v3.10.2
|
||||
|
||||
require (
|
||||
github.com/containrrr/shoutrrr v0.7.1
|
||||
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
|
||||
github.com/google/uuid v1.3.0 // indirect
|
||||
github.com/prometheus/client_golang v1.16.0
|
||||
github.com/prometheus/common v0.44.0
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
github.com/spf13/cobra v1.7.0
|
||||
github.com/spf13/pflag v1.0.5
|
||||
github.com/spf13/viper v1.16.0
|
||||
github.com/stretchr/testify v1.8.4
|
||||
gotest.tools/v3 v3.5.0
|
||||
k8s.io/api v0.27.4
|
||||
k8s.io/apimachinery v0.27.4
|
||||
k8s.io/client-go v0.27.4
|
||||
k8s.io/kubectl v0.27.4
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect
|
||||
github.com/MakeNowJust/heredoc v1.0.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
github.com/chai2010/gettext-go v1.0.2 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
|
||||
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
|
||||
github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d // indirect
|
||||
github.com/fatih/color v1.14.1 // indirect
|
||||
github.com/fsnotify/fsnotify v1.6.0 // indirect
|
||||
github.com/go-errors/errors v1.4.2 // indirect
|
||||
github.com/go-logr/logr v1.2.3 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.19.6 // indirect
|
||||
github.com/go-openapi/jsonreference v0.20.1 // indirect
|
||||
github.com/go-openapi/swag v0.22.3 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang/protobuf v1.5.3 // indirect
|
||||
github.com/google/btree v1.0.1 // indirect
|
||||
github.com/google/gnostic v0.5.7-v3refs // indirect
|
||||
github.com/google/go-cmp v0.5.9 // indirect
|
||||
github.com/google/gofuzz v1.1.0 // indirect
|
||||
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 // indirect
|
||||
github.com/hashicorp/hcl v1.0.0 // indirect
|
||||
github.com/imdario/mergo v0.3.6 // indirect
|
||||
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
|
||||
github.com/magiconair/properties v1.8.7 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.17 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
|
||||
github.com/mitchellh/go-wordwrap v1.0.0 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/moby/spdystream v0.2.0 // indirect
|
||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.0.8 // indirect
|
||||
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/prometheus/client_model v0.4.0 // indirect
|
||||
github.com/prometheus/procfs v0.10.1 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/spf13/afero v1.9.5 // indirect
|
||||
github.com/spf13/cast v1.5.1 // indirect
|
||||
github.com/spf13/jwalterweatherman v1.1.0 // indirect
|
||||
github.com/subosito/gotenv v1.4.2 // indirect
|
||||
github.com/xlab/treeprint v1.1.0 // indirect
|
||||
go.starlark.net v0.0.0-20200306205701-8dd3e2ee1dd5 // indirect
|
||||
golang.org/x/net v0.10.0 // indirect
|
||||
golang.org/x/oauth2 v0.8.0 // indirect
|
||||
golang.org/x/sys v0.8.0 // indirect
|
||||
golang.org/x/term v0.6.0 // indirect
|
||||
golang.org/x/text v0.9.0 // indirect
|
||||
golang.org/x/time v0.1.0 // indirect
|
||||
google.golang.org/appengine v1.6.7 // indirect
|
||||
google.golang.org/protobuf v1.30.0 // indirect
|
||||
gopkg.in/inf.v0 v0.9.1 // indirect
|
||||
gopkg.in/ini.v1 v1.67.0 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
k8s.io/cli-runtime v0.27.4 // indirect
|
||||
k8s.io/component-base v0.27.4 // indirect
|
||||
k8s.io/klog/v2 v2.90.1 // indirect
|
||||
k8s.io/kube-openapi v0.0.0-20230501164219-8b0f38b5fd1f // indirect
|
||||
k8s.io/utils v0.0.0-20230209194617-a36077c30491 // indirect
|
||||
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
|
||||
sigs.k8s.io/kustomize/api v0.13.2 // indirect
|
||||
sigs.k8s.io/kustomize/kyaml v0.14.1 // indirect
|
||||
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
|
||||
sigs.k8s.io/yaml v1.3.0 // indirect
|
||||
)
|
||||
BIN
img/cncf-color.png
Executable file
BIN
img/cncf-color.png
Executable file
Binary file not shown.
|
After Width: | Height: | Size: 24 KiB |
BIN
img/logo.png
BIN
img/logo.png
Binary file not shown.
|
Before Width: | Height: | Size: 16 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 7.2 KiB |
@@ -1,32 +1,46 @@
|
||||
apiVersion: extensions/v1beta1
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: kured
|
||||
namespace: kube-system
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: kured # Must match `--ds-name`
|
||||
name: kured # Must match `--ds-name`
|
||||
namespace: kube-system # Must match `--ds-namespace`
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
name: kured
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
name: kured
|
||||
spec:
|
||||
serviceAccountName: kured
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/master
|
||||
effect: NoSchedule
|
||||
hostPID: true # Facilitate entering the host mount namespace via init
|
||||
restartPolicy: Always
|
||||
containers:
|
||||
- name: kured
|
||||
image: quay.io/weaveworks/kured
|
||||
# If you find yourself here wondering why there is no
|
||||
# :latest tag on Docker Hub,see the FAQ in the README
|
||||
image: ghcr.io/kubereboot/kured:1.14.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /usr/bin/kured
|
||||
# args:
|
||||
# - --alert-filter-regexp=^RebootRequired$
|
||||
# - --ds-name=kured
|
||||
# - --ds-namespace=kube-system
|
||||
# - --lock-annotation=weave.works/kured-node-lock
|
||||
# - --period=1h
|
||||
# - --prometheus-url=http://prometheus.monitoring.svc.cluster.local
|
||||
# - --reboot-sentinel=/var/run/reboot-required
|
||||
# - --slack-hook-url=https://hooks.slack.com/...
|
||||
# - --slack-username=prod
|
||||
#
|
||||
# NO USER SERVICEABLE PARTS BEYOND THIS POINT
|
||||
securityContext:
|
||||
privileged: true # Give permission to nsenter /proc/1/ns/mnt
|
||||
readOnlyRootFilesystem: true
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: metrics
|
||||
env:
|
||||
# Pass in the name of the node on which this pod is scheduled
|
||||
# for use with drain/uncordon operations and lock acquisition
|
||||
@@ -34,14 +48,43 @@ spec:
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
volumeMounts:
|
||||
# Needed for two purposes:
|
||||
# * Testing for the existence of /var/run/reboot-required
|
||||
# * Accessing /var/run/dbus/system_bus_socket to effect reboot
|
||||
- name: hostrun
|
||||
mountPath: /var/run
|
||||
restartPolicy: Always
|
||||
volumes:
|
||||
- name: hostrun
|
||||
hostPath:
|
||||
path: /var/run
|
||||
command:
|
||||
- /usr/bin/kured
|
||||
# - --force-reboot=false
|
||||
# - --drain-grace-period=-1
|
||||
# - --skip-wait-for-delete-timeout=0
|
||||
# - --drain-timeout=0
|
||||
# - --drain-pod-selector=""
|
||||
# - --period=1h
|
||||
# - --ds-namespace=kube-system
|
||||
# - --ds-name=kured
|
||||
# - --lock-annotation=weave.works/kured-node-lock
|
||||
# - --lock-ttl=0
|
||||
# - --prometheus-url=http://prometheus.monitoring.svc.cluster.local
|
||||
# - --alert-filter-regexp=^RebootRequired$
|
||||
# - --alert-filter-match-only=false
|
||||
# - --alert-firing-only=false
|
||||
# - --reboot-sentinel=/var/run/reboot-required
|
||||
# - --prefer-no-schedule-taint=""
|
||||
# - --reboot-sentinel-command=""
|
||||
# - --slack-hook-url=https://hooks.slack.com/...
|
||||
# - --slack-username=prod
|
||||
# - --slack-channel=alerting
|
||||
# - --notify-url="" # See also shoutrrr url format
|
||||
# - --message-template-drain=Draining node %s
|
||||
# - --message-template-reboot=Rebooting node %s
|
||||
# - --message-template-uncordon=Node %s rebooted & uncordoned successfully!
|
||||
# - --blocking-pod-selector=runtime=long,cost=expensive
|
||||
# - --blocking-pod-selector=name=temperamental
|
||||
# - --blocking-pod-selector=...
|
||||
# - --reboot-days=sun,mon,tue,wed,thu,fri,sat
|
||||
# - --reboot-delay=90s
|
||||
# - --start-time=0:00
|
||||
# - --end-time=23:59:59
|
||||
# - --time-zone=UTC
|
||||
# - --annotate-nodes=false
|
||||
# - --lock-release-delay=30m
|
||||
# - --log-format=text
|
||||
# - --metrics-host=""
|
||||
# - --metrics-port=8080
|
||||
# - --concurrency=1
|
||||
|
||||
63
kured-rbac.yaml
Normal file
63
kured-rbac.yaml
Normal file
@@ -0,0 +1,63 @@
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: kured
|
||||
rules:
|
||||
# Allow kured to read spec.unschedulable
|
||||
# Allow kubectl to drain/uncordon
|
||||
#
|
||||
# NB: These permissions are tightly coupled to the bundled version of kubectl; the ones below
|
||||
# match https://github.com/kubernetes/kubernetes/blob/v1.19.4/staging/src/k8s.io/kubectl/pkg/cmd/drain/drain.go
|
||||
#
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes"]
|
||||
verbs: ["get", "patch"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods"]
|
||||
verbs: ["list","delete","get"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["daemonsets"]
|
||||
verbs: ["get"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods/eviction"]
|
||||
verbs: ["create"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: kured
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: kured
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: kured
|
||||
namespace: kube-system
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
namespace: kube-system
|
||||
name: kured
|
||||
rules:
|
||||
# Allow kured to lock/unlock itself
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["daemonsets"]
|
||||
resourceNames: ["kured"]
|
||||
verbs: ["update"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
namespace: kube-system
|
||||
name: kured
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
namespace: kube-system
|
||||
name: kured
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: kured
|
||||
@@ -7,21 +7,39 @@ import (
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/api/prometheus"
|
||||
papi "github.com/prometheus/client_golang/api"
|
||||
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
|
||||
"github.com/prometheus/common/model"
|
||||
)
|
||||
|
||||
// Returns a list of names of active (e.g. pending or firing) alerts, filtered
|
||||
// by the supplied regexp.
|
||||
func PrometheusActiveAlerts(prometheusURL string, filter *regexp.Regexp) ([]string, error) {
|
||||
client, err := prometheus.New(prometheus.Config{Address: prometheusURL})
|
||||
// PromClient is a wrapper around the Prometheus Client interface and implements the api
|
||||
// This way, the PromClient can be instantiated with the configuration the Client needs, and
|
||||
// the ability to use the methods the api has, like Query and so on.
|
||||
type PromClient struct {
|
||||
papi papi.Client
|
||||
api v1.API
|
||||
}
|
||||
|
||||
// NewPromClient creates a new client to the Prometheus API.
|
||||
// It returns an error on any problem.
|
||||
func NewPromClient(conf papi.Config) (*PromClient, error) {
|
||||
promClient, err := papi.NewClient(conf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
client := PromClient{papi: promClient, api: v1.NewAPI(promClient)}
|
||||
return &client, nil
|
||||
}
|
||||
|
||||
queryAPI := prometheus.NewQueryAPI(client)
|
||||
// ActiveAlerts is a method of type PromClient, it returns a list of names of active alerts
|
||||
// (e.g. pending or firing), filtered by the supplied regexp or by the includeLabels query.
|
||||
// filter by regexp means when the regex finds the alert-name; the alert is exluded from the
|
||||
// block-list and will NOT block rebooting. query by includeLabel means,
|
||||
// if the query finds an alert, it will include it to the block-list and it WILL block rebooting.
|
||||
func (p *PromClient) ActiveAlerts(filter *regexp.Regexp, firingOnly, filterMatchOnly bool) ([]string, error) {
|
||||
|
||||
value, err := queryAPI.Query(context.Background(), "ALERTS", time.Now())
|
||||
// get all alerts from prometheus
|
||||
value, _, err := p.api.Query(context.Background(), "ALERTS", time.Now())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -31,17 +49,17 @@ func PrometheusActiveAlerts(prometheusURL string, filter *regexp.Regexp) ([]stri
|
||||
activeAlertSet := make(map[string]bool)
|
||||
for _, sample := range vector {
|
||||
if alertName, isAlert := sample.Metric[model.AlertNameLabel]; isAlert && sample.Value != 0 {
|
||||
if filter == nil || !filter.MatchString(string(alertName)) {
|
||||
if matchesRegex(filter, string(alertName), filterMatchOnly) && (!firingOnly || sample.Metric["alertstate"] == "firing") {
|
||||
activeAlertSet[string(alertName)] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var activeAlerts []string
|
||||
for activeAlert, _ := range activeAlertSet {
|
||||
for activeAlert := range activeAlertSet {
|
||||
activeAlerts = append(activeAlerts, activeAlert)
|
||||
}
|
||||
sort.Sort(sort.StringSlice(activeAlerts))
|
||||
sort.Strings(activeAlerts)
|
||||
|
||||
return activeAlerts, nil
|
||||
}
|
||||
@@ -49,3 +67,11 @@ func PrometheusActiveAlerts(prometheusURL string, filter *regexp.Regexp) ([]stri
|
||||
|
||||
return nil, fmt.Errorf("Unexpected value type: %v", value)
|
||||
}
|
||||
|
||||
func matchesRegex(filter *regexp.Regexp, alertName string, filterMatchOnly bool) bool {
|
||||
if filter == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
return filter.MatchString(string(alertName)) == filterMatchOnly
|
||||
}
|
||||
|
||||
166
pkg/alerts/prometheus_test.go
Normal file
166
pkg/alerts/prometheus_test.go
Normal file
@@ -0,0 +1,166 @@
|
||||
package alerts
|
||||
|
||||
import (
|
||||
"log"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/client_golang/api"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
type MockResponse struct {
|
||||
StatusCode int
|
||||
Body []byte
|
||||
}
|
||||
|
||||
// MockServerProperties ties a mock response to a url and a method
|
||||
type MockServerProperties struct {
|
||||
URI string
|
||||
HTTPMethod string
|
||||
Response MockResponse
|
||||
}
|
||||
|
||||
// NewMockServer sets up a new MockServer with properties ad starts the server.
|
||||
func NewMockServer(props ...MockServerProperties) *httptest.Server {
|
||||
|
||||
handler := http.HandlerFunc(
|
||||
func(w http.ResponseWriter, r *http.Request) {
|
||||
for _, proc := range props {
|
||||
_, err := w.Write(proc.Response.Body)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
})
|
||||
return httptest.NewServer(handler)
|
||||
}
|
||||
|
||||
func TestActiveAlerts(t *testing.T) {
|
||||
responsebody := `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"ALERTS","alertname":"GatekeeperViolations","alertstate":"firing","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]},{"metric":{"__name__":"ALERTS","alertname":"PodCrashing-dev","alertstate":"firing","container":"deployment","instance":"1.2.3.4:8080","job":"kube-state-metrics","namespace":"dev","pod":"dev-deployment-78dcbmf25v","severity":"critical","team":"dev"},"value":[1622472933.973,"1"]},{"metric":{"__name__":"ALERTS","alertname":"PodRestart-dev","alertstate":"firing","container":"deployment","instance":"1.2.3.4:1234","job":"kube-state-metrics","namespace":"qa","pod":"qa-job-deployment-78dcbmf25v","severity":"warning","team":"qa"},"value":[1622472933.973,"1"]},{"metric":{"__name__":"ALERTS","alertname":"PrometheusTargetDown","alertstate":"firing","job":"kubernetes-pods","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]},{"metric":{"__name__":"ALERTS","alertname":"ScheduledRebootFailing","alertstate":"pending","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]}]}}`
|
||||
addr := "http://localhost:10001"
|
||||
|
||||
for _, tc := range []struct {
|
||||
it string
|
||||
rFilter string
|
||||
respBody string
|
||||
aName string
|
||||
wantN int
|
||||
firingOnly bool
|
||||
filterMatchOnly bool
|
||||
}{
|
||||
{
|
||||
it: "should return no active alerts",
|
||||
respBody: responsebody,
|
||||
rFilter: "",
|
||||
wantN: 0,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: false,
|
||||
},
|
||||
{
|
||||
it: "should return a subset of all alerts",
|
||||
respBody: responsebody,
|
||||
rFilter: "Pod",
|
||||
wantN: 3,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: false,
|
||||
},
|
||||
{
|
||||
it: "should return a subset of all alerts",
|
||||
respBody: responsebody,
|
||||
rFilter: "Gatekeeper",
|
||||
wantN: 1,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: true,
|
||||
},
|
||||
{
|
||||
it: "should return all active alerts by regex",
|
||||
respBody: responsebody,
|
||||
rFilter: "*",
|
||||
wantN: 5,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: false,
|
||||
},
|
||||
{
|
||||
it: "should return all active alerts by regex filter",
|
||||
respBody: responsebody,
|
||||
rFilter: "*",
|
||||
wantN: 5,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: false,
|
||||
},
|
||||
{
|
||||
it: "should return only firing alerts if firingOnly is true",
|
||||
respBody: responsebody,
|
||||
rFilter: "*",
|
||||
wantN: 4,
|
||||
firingOnly: true,
|
||||
filterMatchOnly: false,
|
||||
},
|
||||
|
||||
{
|
||||
it: "should return ScheduledRebootFailing active alerts",
|
||||
respBody: `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"ALERTS","alertname":"ScheduledRebootFailing","alertstate":"pending","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]}]}}`,
|
||||
aName: "ScheduledRebootFailing",
|
||||
rFilter: "*",
|
||||
wantN: 1,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: false,
|
||||
},
|
||||
{
|
||||
it: "should not return an active alert if RebootRequired is firing (regex filter)",
|
||||
respBody: `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"ALERTS","alertname":"RebootRequired","alertstate":"pending","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]}]}}`,
|
||||
rFilter: "RebootRequired",
|
||||
wantN: 0,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: false,
|
||||
},
|
||||
{
|
||||
it: "should not return an active alert if RebootRequired is firing (regex filter)",
|
||||
respBody: `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"ALERTS","alertname":"RebootRequired","alertstate":"pending","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]}]}}`,
|
||||
rFilter: "RebootRequired",
|
||||
wantN: 1,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: true,
|
||||
},
|
||||
} {
|
||||
// Start mockServer
|
||||
mockServer := NewMockServer(MockServerProperties{
|
||||
URI: addr,
|
||||
HTTPMethod: http.MethodPost,
|
||||
Response: MockResponse{
|
||||
Body: []byte(tc.respBody),
|
||||
},
|
||||
})
|
||||
// Close mockServer after all connections are gone
|
||||
defer mockServer.Close()
|
||||
|
||||
t.Run(tc.it, func(t *testing.T) {
|
||||
|
||||
// regex filter
|
||||
regex, _ := regexp.Compile(tc.rFilter)
|
||||
|
||||
// instantiate the prometheus client with the mockserver-address
|
||||
p, err := NewPromClient(api.Config{Address: mockServer.URL})
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
result, err := p.ActiveAlerts(regex, tc.firingOnly, tc.filterMatchOnly)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// assert
|
||||
assert.Equal(t, tc.wantN, len(result), "expected amount of alerts %v, got %v", tc.wantN, len(result))
|
||||
|
||||
if tc.aName != "" {
|
||||
assert.Equal(t, tc.aName, result[0], "expected active alert %v, got %v", tc.aName, result[0])
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,15 +1,25 @@
|
||||
package daemonsetlock
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/apps/v1"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
)
|
||||
|
||||
const (
|
||||
k8sAPICallRetrySleep = 5 * time.Second // How much time to wait in between retrying a k8s API call
|
||||
k8sAPICallRetryTimeout = 5 * time.Minute // How long to wait until we determine that the k8s API is definitively unavailable
|
||||
)
|
||||
|
||||
// DaemonSetLock holds all necessary information to do actions
|
||||
// on the kured ds which holds lock info through annotations.
|
||||
type DaemonSetLock struct {
|
||||
client *kubernetes.Clientset
|
||||
nodeID string
|
||||
@@ -19,19 +29,28 @@ type DaemonSetLock struct {
|
||||
}
|
||||
|
||||
type lockAnnotationValue struct {
|
||||
NodeID string `json:"nodeID"`
|
||||
Metadata interface{} `json:"metadata,omitempty"`
|
||||
NodeID string `json:"nodeID"`
|
||||
Metadata interface{} `json:"metadata,omitempty"`
|
||||
Created time.Time `json:"created"`
|
||||
TTL time.Duration `json:"TTL"`
|
||||
}
|
||||
|
||||
type multiLockAnnotationValue struct {
|
||||
MaxOwners int `json:"maxOwners"`
|
||||
LockAnnotations []lockAnnotationValue `json:"locks"`
|
||||
}
|
||||
|
||||
// New creates a daemonsetLock object containing the necessary data for follow up k8s requests
|
||||
func New(client *kubernetes.Clientset, nodeID, namespace, name, annotation string) *DaemonSetLock {
|
||||
return &DaemonSetLock{client, nodeID, namespace, name, annotation}
|
||||
}
|
||||
|
||||
func (dsl *DaemonSetLock) Acquire(metadata interface{}) (acquired bool, owner string, err error) {
|
||||
// Acquire attempts to annotate the kured daemonset with lock info from instantiated DaemonSetLock using client-go
|
||||
func (dsl *DaemonSetLock) Acquire(metadata interface{}, TTL time.Duration) (bool, string, error) {
|
||||
for {
|
||||
ds, err := dsl.client.ExtensionsV1beta1().DaemonSets(dsl.namespace).Get(dsl.name, metav1.GetOptions{})
|
||||
ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout)
|
||||
if err != nil {
|
||||
return false, "", err
|
||||
return false, "", fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err)
|
||||
}
|
||||
|
||||
valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation]
|
||||
@@ -40,20 +59,23 @@ func (dsl *DaemonSetLock) Acquire(metadata interface{}) (acquired bool, owner st
|
||||
if err := json.Unmarshal([]byte(valueString), &value); err != nil {
|
||||
return false, "", err
|
||||
}
|
||||
return value.NodeID == dsl.nodeID, value.NodeID, nil
|
||||
|
||||
if !ttlExpired(value.Created, value.TTL) {
|
||||
return value.NodeID == dsl.nodeID, value.NodeID, nil
|
||||
}
|
||||
}
|
||||
|
||||
if ds.ObjectMeta.Annotations == nil {
|
||||
ds.ObjectMeta.Annotations = make(map[string]string)
|
||||
}
|
||||
value := lockAnnotationValue{NodeID: dsl.nodeID, Metadata: metadata}
|
||||
value := lockAnnotationValue{NodeID: dsl.nodeID, Metadata: metadata, Created: time.Now().UTC(), TTL: TTL}
|
||||
valueBytes, err := json.Marshal(&value)
|
||||
if err != nil {
|
||||
return false, "", err
|
||||
}
|
||||
ds.ObjectMeta.Annotations[dsl.annotation] = string(valueBytes)
|
||||
|
||||
_, err = dsl.client.ExtensionsV1beta1().DaemonSets(dsl.namespace).Update(ds)
|
||||
_, err = dsl.client.AppsV1().DaemonSets(dsl.namespace).Update(context.TODO(), ds, metav1.UpdateOptions{})
|
||||
if err != nil {
|
||||
if se, ok := err.(*errors.StatusError); ok && se.ErrStatus.Reason == metav1.StatusReasonConflict {
|
||||
// Something else updated the resource between us reading and writing - try again soon
|
||||
@@ -67,10 +89,97 @@ func (dsl *DaemonSetLock) Acquire(metadata interface{}) (acquired bool, owner st
|
||||
}
|
||||
}
|
||||
|
||||
func (dsl *DaemonSetLock) Test(metadata interface{}) (holding bool, err error) {
|
||||
ds, err := dsl.client.ExtensionsV1beta1().DaemonSets(dsl.namespace).Get(dsl.name, metav1.GetOptions{})
|
||||
// AcquireMultiple creates and annotates the daemonset with a multiple owner lock
|
||||
func (dsl *DaemonSetLock) AcquireMultiple(metadata interface{}, TTL time.Duration, maxOwners int) (bool, []string, error) {
|
||||
for {
|
||||
ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout)
|
||||
if err != nil {
|
||||
return false, []string{}, fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err)
|
||||
}
|
||||
|
||||
annotation := multiLockAnnotationValue{}
|
||||
valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation]
|
||||
if exists {
|
||||
if err := json.Unmarshal([]byte(valueString), &annotation); err != nil {
|
||||
return false, []string{}, fmt.Errorf("error getting multi lock: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
lockPossible, newAnnotation := dsl.canAcquireMultiple(annotation, metadata, TTL, maxOwners)
|
||||
if !lockPossible {
|
||||
return false, nodeIDsFromMultiLock(newAnnotation), nil
|
||||
}
|
||||
|
||||
if ds.ObjectMeta.Annotations == nil {
|
||||
ds.ObjectMeta.Annotations = make(map[string]string)
|
||||
}
|
||||
newAnnotationBytes, err := json.Marshal(&newAnnotation)
|
||||
if err != nil {
|
||||
return false, []string{}, fmt.Errorf("error marshalling new annotation lock: %w", err)
|
||||
}
|
||||
ds.ObjectMeta.Annotations[dsl.annotation] = string(newAnnotationBytes)
|
||||
|
||||
_, err = dsl.client.AppsV1().DaemonSets(dsl.namespace).Update(context.Background(), ds, metav1.UpdateOptions{})
|
||||
if err != nil {
|
||||
if se, ok := err.(*errors.StatusError); ok && se.ErrStatus.Reason == metav1.StatusReasonConflict {
|
||||
time.Sleep(time.Second)
|
||||
continue
|
||||
} else {
|
||||
return false, []string{}, fmt.Errorf("error updating daemonset with multi lock: %w", err)
|
||||
}
|
||||
}
|
||||
return true, nodeIDsFromMultiLock(newAnnotation), nil
|
||||
}
|
||||
}
|
||||
|
||||
func nodeIDsFromMultiLock(annotation multiLockAnnotationValue) []string {
|
||||
nodeIDs := make([]string, 0, len(annotation.LockAnnotations))
|
||||
for _, nodeLock := range annotation.LockAnnotations {
|
||||
nodeIDs = append(nodeIDs, nodeLock.NodeID)
|
||||
}
|
||||
return nodeIDs
|
||||
}
|
||||
|
||||
func (dsl *DaemonSetLock) canAcquireMultiple(annotation multiLockAnnotationValue, metadata interface{}, TTL time.Duration, maxOwners int) (bool, multiLockAnnotationValue) {
|
||||
newAnnotation := multiLockAnnotationValue{MaxOwners: maxOwners}
|
||||
freeSpace := false
|
||||
if annotation.LockAnnotations == nil || len(annotation.LockAnnotations) < maxOwners {
|
||||
freeSpace = true
|
||||
newAnnotation.LockAnnotations = annotation.LockAnnotations
|
||||
} else {
|
||||
for _, nodeLock := range annotation.LockAnnotations {
|
||||
if ttlExpired(nodeLock.Created, nodeLock.TTL) {
|
||||
freeSpace = true
|
||||
continue
|
||||
}
|
||||
newAnnotation.LockAnnotations = append(
|
||||
newAnnotation.LockAnnotations,
|
||||
nodeLock,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if freeSpace {
|
||||
newAnnotation.LockAnnotations = append(
|
||||
newAnnotation.LockAnnotations,
|
||||
lockAnnotationValue{
|
||||
NodeID: dsl.nodeID,
|
||||
Metadata: metadata,
|
||||
Created: time.Now().UTC(),
|
||||
TTL: TTL,
|
||||
},
|
||||
)
|
||||
return true, newAnnotation
|
||||
}
|
||||
|
||||
return false, multiLockAnnotationValue{}
|
||||
}
|
||||
|
||||
// Test attempts to check the kured daemonset lock status (existence, expiry) from instantiated DaemonSetLock using client-go
|
||||
func (dsl *DaemonSetLock) Test(metadata interface{}) (bool, error) {
|
||||
ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout)
|
||||
if err != nil {
|
||||
return false, err
|
||||
return false, fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err)
|
||||
}
|
||||
|
||||
valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation]
|
||||
@@ -79,17 +188,45 @@ func (dsl *DaemonSetLock) Test(metadata interface{}) (holding bool, err error) {
|
||||
if err := json.Unmarshal([]byte(valueString), &value); err != nil {
|
||||
return false, err
|
||||
}
|
||||
return value.NodeID == dsl.nodeID, nil
|
||||
|
||||
if !ttlExpired(value.Created, value.TTL) {
|
||||
return value.NodeID == dsl.nodeID, nil
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// TestMultiple attempts to check the kured daemonset lock status for multi locks
|
||||
func (dsl *DaemonSetLock) TestMultiple() (bool, error) {
|
||||
ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err)
|
||||
}
|
||||
|
||||
valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation]
|
||||
if exists {
|
||||
value := multiLockAnnotationValue{}
|
||||
if err := json.Unmarshal([]byte(valueString), &value); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
for _, nodeLock := range value.LockAnnotations {
|
||||
if nodeLock.NodeID == dsl.nodeID && !ttlExpired(nodeLock.Created, nodeLock.TTL) {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Release attempts to remove the lock data from the kured ds annotations using client-go
|
||||
func (dsl *DaemonSetLock) Release() error {
|
||||
for {
|
||||
ds, err := dsl.client.ExtensionsV1beta1().DaemonSets(dsl.namespace).Get(dsl.name, metav1.GetOptions{})
|
||||
ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err)
|
||||
}
|
||||
|
||||
valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation]
|
||||
@@ -98,6 +235,7 @@ func (dsl *DaemonSetLock) Release() error {
|
||||
if err := json.Unmarshal([]byte(valueString), &value); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if value.NodeID != dsl.nodeID {
|
||||
return fmt.Errorf("Not lock holder: %v", value.NodeID)
|
||||
}
|
||||
@@ -107,7 +245,7 @@ func (dsl *DaemonSetLock) Release() error {
|
||||
|
||||
delete(ds.ObjectMeta.Annotations, dsl.annotation)
|
||||
|
||||
_, err = dsl.client.ExtensionsV1beta1().DaemonSets(dsl.namespace).Update(ds)
|
||||
_, err = dsl.client.AppsV1().DaemonSets(dsl.namespace).Update(context.TODO(), ds, metav1.UpdateOptions{})
|
||||
if err != nil {
|
||||
if se, ok := err.(*errors.StatusError); ok && se.ErrStatus.Reason == metav1.StatusReasonConflict {
|
||||
// Something else updated the resource between us reading and writing - try again soon
|
||||
@@ -120,3 +258,77 @@ func (dsl *DaemonSetLock) Release() error {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// ReleaseMultiple attempts to remove the lock data from the kured ds annotations using client-go
|
||||
func (dsl *DaemonSetLock) ReleaseMultiple() error {
|
||||
for {
|
||||
ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout)
|
||||
if err != nil {
|
||||
return fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err)
|
||||
}
|
||||
|
||||
valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation]
|
||||
modified := false
|
||||
value := multiLockAnnotationValue{}
|
||||
if exists {
|
||||
if err := json.Unmarshal([]byte(valueString), &value); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for idx, nodeLock := range value.LockAnnotations {
|
||||
if nodeLock.NodeID == dsl.nodeID {
|
||||
value.LockAnnotations = append(value.LockAnnotations[:idx], value.LockAnnotations[idx+1:]...)
|
||||
modified = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !exists || !modified {
|
||||
return fmt.Errorf("Lock not held")
|
||||
}
|
||||
|
||||
newAnnotationBytes, err := json.Marshal(value)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error marshalling new annotation on release: %v", err)
|
||||
}
|
||||
ds.ObjectMeta.Annotations[dsl.annotation] = string(newAnnotationBytes)
|
||||
|
||||
_, err = dsl.client.AppsV1().DaemonSets(dsl.namespace).Update(context.TODO(), ds, metav1.UpdateOptions{})
|
||||
if err != nil {
|
||||
if se, ok := err.(*errors.StatusError); ok && se.ErrStatus.Reason == metav1.StatusReasonConflict {
|
||||
// Something else updated the resource between us reading and writing - try again soon
|
||||
time.Sleep(time.Second)
|
||||
continue
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// GetDaemonSet returns the named DaemonSet resource from the DaemonSetLock's configured client
|
||||
func (dsl *DaemonSetLock) GetDaemonSet(sleep, timeout time.Duration) (*v1.DaemonSet, error) {
|
||||
var ds *v1.DaemonSet
|
||||
var lastError error
|
||||
err := wait.PollImmediate(sleep, timeout, func() (bool, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
if ds, lastError = dsl.client.AppsV1().DaemonSets(dsl.namespace).Get(ctx, dsl.name, metav1.GetOptions{}); lastError != nil {
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Timed out trying to get daemonset %s in namespace %s: %v", dsl.name, dsl.namespace, lastError)
|
||||
}
|
||||
return ds, nil
|
||||
}
|
||||
|
||||
func ttlExpired(created time.Time, ttl time.Duration) bool {
|
||||
if ttl > 0 && time.Since(created) >= ttl {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
208
pkg/daemonsetlock/daemonsetlock_test.go
Normal file
208
pkg/daemonsetlock/daemonsetlock_test.go
Normal file
@@ -0,0 +1,208 @@
|
||||
package daemonsetlock
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestTtlExpired(t *testing.T) {
|
||||
d := time.Date(2020, 05, 05, 14, 15, 0, 0, time.UTC)
|
||||
second, _ := time.ParseDuration("1s")
|
||||
zero, _ := time.ParseDuration("0m")
|
||||
|
||||
tests := []struct {
|
||||
created time.Time
|
||||
ttl time.Duration
|
||||
result bool
|
||||
}{
|
||||
{d, second, true},
|
||||
{time.Now(), second, false},
|
||||
{d, zero, false},
|
||||
}
|
||||
|
||||
for i, tst := range tests {
|
||||
if ttlExpired(tst.created, tst.ttl) != tst.result {
|
||||
t.Errorf("Test %d failed, expected %v but got %v", i, tst.result, !tst.result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func multiLockAnnotationsAreEqualByNodes(src, dst multiLockAnnotationValue) bool {
|
||||
srcNodes := []string{}
|
||||
for _, srcLock := range src.LockAnnotations {
|
||||
srcNodes = append(srcNodes, srcLock.NodeID)
|
||||
}
|
||||
sort.Strings(srcNodes)
|
||||
|
||||
dstNodes := []string{}
|
||||
for _, dstLock := range dst.LockAnnotations {
|
||||
dstNodes = append(dstNodes, dstLock.NodeID)
|
||||
}
|
||||
sort.Strings(dstNodes)
|
||||
|
||||
return reflect.DeepEqual(srcNodes, dstNodes)
|
||||
}
|
||||
|
||||
func TestCanAcquireMultiple(t *testing.T) {
|
||||
node1Name := "n1"
|
||||
node2Name := "n2"
|
||||
node3Name := "n3"
|
||||
testCases := []struct {
|
||||
name string
|
||||
daemonSetLock DaemonSetLock
|
||||
maxOwners int
|
||||
current multiLockAnnotationValue
|
||||
desired multiLockAnnotationValue
|
||||
lockPossible bool
|
||||
}{
|
||||
{
|
||||
name: "empty_lock",
|
||||
daemonSetLock: DaemonSetLock{
|
||||
nodeID: node1Name,
|
||||
},
|
||||
maxOwners: 2,
|
||||
current: multiLockAnnotationValue{},
|
||||
desired: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{NodeID: node1Name},
|
||||
},
|
||||
},
|
||||
lockPossible: true,
|
||||
},
|
||||
{
|
||||
name: "partial_lock",
|
||||
daemonSetLock: DaemonSetLock{
|
||||
nodeID: node1Name,
|
||||
},
|
||||
maxOwners: 2,
|
||||
current: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{NodeID: node2Name},
|
||||
},
|
||||
},
|
||||
desired: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{NodeID: node1Name},
|
||||
{NodeID: node2Name},
|
||||
},
|
||||
},
|
||||
lockPossible: true,
|
||||
},
|
||||
{
|
||||
name: "full_lock",
|
||||
daemonSetLock: DaemonSetLock{
|
||||
nodeID: node1Name,
|
||||
},
|
||||
maxOwners: 2,
|
||||
current: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{
|
||||
NodeID: node2Name,
|
||||
Created: time.Now().UTC().Add(-1 * time.Minute),
|
||||
TTL: time.Hour,
|
||||
},
|
||||
{
|
||||
NodeID: node3Name,
|
||||
Created: time.Now().UTC().Add(-1 * time.Minute),
|
||||
TTL: time.Hour,
|
||||
},
|
||||
},
|
||||
},
|
||||
desired: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{NodeID: node2Name},
|
||||
{NodeID: node3Name},
|
||||
},
|
||||
},
|
||||
lockPossible: false,
|
||||
},
|
||||
{
|
||||
name: "full_with_one_expired_lock",
|
||||
daemonSetLock: DaemonSetLock{
|
||||
nodeID: node1Name,
|
||||
},
|
||||
maxOwners: 2,
|
||||
current: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{
|
||||
NodeID: node2Name,
|
||||
Created: time.Now().UTC().Add(-1 * time.Hour),
|
||||
TTL: time.Minute,
|
||||
},
|
||||
{
|
||||
NodeID: node3Name,
|
||||
Created: time.Now().UTC().Add(-1 * time.Minute),
|
||||
TTL: time.Hour,
|
||||
},
|
||||
},
|
||||
},
|
||||
desired: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{NodeID: node1Name},
|
||||
{NodeID: node3Name},
|
||||
},
|
||||
},
|
||||
lockPossible: true,
|
||||
},
|
||||
{
|
||||
name: "full_with_all_expired_locks",
|
||||
daemonSetLock: DaemonSetLock{
|
||||
nodeID: node1Name,
|
||||
},
|
||||
maxOwners: 2,
|
||||
current: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{
|
||||
NodeID: node2Name,
|
||||
Created: time.Now().UTC().Add(-1 * time.Hour),
|
||||
TTL: time.Minute,
|
||||
},
|
||||
{
|
||||
NodeID: node3Name,
|
||||
Created: time.Now().UTC().Add(-1 * time.Hour),
|
||||
TTL: time.Minute,
|
||||
},
|
||||
},
|
||||
},
|
||||
desired: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{NodeID: node1Name},
|
||||
},
|
||||
},
|
||||
lockPossible: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.name, func(t *testing.T) {
|
||||
lockPossible, actual := testCase.daemonSetLock.canAcquireMultiple(testCase.current, struct{}{}, time.Minute, testCase.maxOwners)
|
||||
if lockPossible != testCase.lockPossible {
|
||||
t.Fatalf(
|
||||
"unexpected result for lock possible (got %t expected %t new annotation %v",
|
||||
lockPossible,
|
||||
testCase.lockPossible,
|
||||
actual,
|
||||
)
|
||||
}
|
||||
|
||||
if lockPossible && (!multiLockAnnotationsAreEqualByNodes(actual, testCase.desired) || testCase.desired.MaxOwners != actual.MaxOwners) {
|
||||
t.Fatalf(
|
||||
"expected lock %v but got %v",
|
||||
testCase.desired,
|
||||
actual,
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// Tick regularly after an initial delay randomly distributed between d/2 and d + d/2
|
||||
// New ticks regularly after an initial delay randomly distributed between d/2 and d + d/2
|
||||
func New(s rand.Source, d time.Duration) <-chan time.Time {
|
||||
c := make(chan time.Time)
|
||||
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
package slack
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
httpClient = &http.Client{Timeout: 5 * time.Second}
|
||||
)
|
||||
|
||||
type body struct {
|
||||
Text string `json:"text,omitempty"`
|
||||
Username string `json:"username,omitempty"`
|
||||
}
|
||||
|
||||
func NotifyReboot(hookURL, username, nodeID string) error {
|
||||
msg := body{
|
||||
Text: fmt.Sprintf("Rebooting node %s", nodeID),
|
||||
Username: username,
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := json.NewEncoder(&buf).Encode(&msg); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
resp, err := httpClient.Post(hookURL, "application/json", &buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return fmt.Errorf(resp.Status)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
166
pkg/taints/taints.go
Normal file
166
pkg/taints/taints.go
Normal file
@@ -0,0 +1,166 @@
|
||||
package taints
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
)
|
||||
|
||||
// Taint allows to set soft and hard limitations for scheduling and executing pods on nodes.
|
||||
type Taint struct {
|
||||
client *kubernetes.Clientset
|
||||
nodeID string
|
||||
taintName string
|
||||
effect v1.TaintEffect
|
||||
exists bool
|
||||
}
|
||||
|
||||
// New provides a new taint.
|
||||
func New(client *kubernetes.Clientset, nodeID, taintName string, effect v1.TaintEffect) *Taint {
|
||||
exists, _, _ := taintExists(client, nodeID, taintName)
|
||||
|
||||
return &Taint{
|
||||
client: client,
|
||||
nodeID: nodeID,
|
||||
taintName: taintName,
|
||||
effect: effect,
|
||||
exists: exists,
|
||||
}
|
||||
}
|
||||
|
||||
// Enable creates the taint for a node. Creating an existing taint is a noop.
|
||||
func (t *Taint) Enable() {
|
||||
if t.taintName == "" {
|
||||
return
|
||||
}
|
||||
|
||||
if t.exists {
|
||||
return
|
||||
}
|
||||
|
||||
preferNoSchedule(t.client, t.nodeID, t.taintName, t.effect, true)
|
||||
|
||||
t.exists = true
|
||||
}
|
||||
|
||||
// Disable removes the taint for a node. Removing a missing taint is a noop.
|
||||
func (t *Taint) Disable() {
|
||||
if t.taintName == "" {
|
||||
return
|
||||
}
|
||||
|
||||
if !t.exists {
|
||||
return
|
||||
}
|
||||
|
||||
preferNoSchedule(t.client, t.nodeID, t.taintName, t.effect, false)
|
||||
|
||||
t.exists = false
|
||||
}
|
||||
|
||||
func taintExists(client *kubernetes.Clientset, nodeID, taintName string) (bool, int, *v1.Node) {
|
||||
updatedNode, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
|
||||
if err != nil || updatedNode == nil {
|
||||
log.Fatalf("Error reading node %s: %v", nodeID, err)
|
||||
}
|
||||
|
||||
for i, taint := range updatedNode.Spec.Taints {
|
||||
if taint.Key == taintName {
|
||||
return true, i, updatedNode
|
||||
}
|
||||
}
|
||||
|
||||
return false, 0, updatedNode
|
||||
}
|
||||
|
||||
func preferNoSchedule(client *kubernetes.Clientset, nodeID, taintName string, effect v1.TaintEffect, shouldExists bool) {
|
||||
taintExists, offset, updatedNode := taintExists(client, nodeID, taintName)
|
||||
|
||||
if taintExists && shouldExists {
|
||||
log.Debugf("Taint %v exists already for node %v.", taintName, nodeID)
|
||||
return
|
||||
}
|
||||
|
||||
if !taintExists && !shouldExists {
|
||||
log.Debugf("Taint %v already missing for node %v.", taintName, nodeID)
|
||||
return
|
||||
}
|
||||
|
||||
type patchTaints struct {
|
||||
Op string `json:"op"`
|
||||
Path string `json:"path"`
|
||||
Value interface{} `json:"value,omitempty"`
|
||||
}
|
||||
|
||||
taint := v1.Taint{
|
||||
Key: taintName,
|
||||
Effect: effect,
|
||||
}
|
||||
|
||||
var patches []patchTaints
|
||||
|
||||
if len(updatedNode.Spec.Taints) == 0 {
|
||||
// add first taint and ensure to keep current taints
|
||||
patches = []patchTaints{
|
||||
{
|
||||
Op: "test",
|
||||
Path: "/spec",
|
||||
Value: updatedNode.Spec,
|
||||
},
|
||||
{
|
||||
Op: "add",
|
||||
Path: "/spec/taints",
|
||||
Value: []v1.Taint{},
|
||||
},
|
||||
{
|
||||
Op: "add",
|
||||
Path: "/spec/taints/-",
|
||||
Value: taint,
|
||||
},
|
||||
}
|
||||
} else if taintExists {
|
||||
// remove taint and ensure to test against race conditions
|
||||
patches = []patchTaints{
|
||||
{
|
||||
Op: "test",
|
||||
Path: fmt.Sprintf("/spec/taints/%d", offset),
|
||||
Value: taint,
|
||||
},
|
||||
{
|
||||
Op: "remove",
|
||||
Path: fmt.Sprintf("/spec/taints/%d", offset),
|
||||
},
|
||||
}
|
||||
} else {
|
||||
// add missing taint to exsting list
|
||||
patches = []patchTaints{
|
||||
{
|
||||
Op: "add",
|
||||
Path: "/spec/taints/-",
|
||||
Value: taint,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
patchBytes, err := json.Marshal(patches)
|
||||
if err != nil {
|
||||
log.Fatalf("Error encoding taint patch for node %s: %v", nodeID, err)
|
||||
}
|
||||
|
||||
_, err = client.CoreV1().Nodes().Patch(context.TODO(), nodeID, types.JSONPatchType, patchBytes, metav1.PatchOptions{})
|
||||
if err != nil {
|
||||
log.Fatalf("Error patching taint for node %s: %v", nodeID, err)
|
||||
}
|
||||
|
||||
if shouldExists {
|
||||
log.Info("Node taint added")
|
||||
} else {
|
||||
log.Info("Node taint removed")
|
||||
}
|
||||
}
|
||||
91
pkg/timewindow/days.go
Normal file
91
pkg/timewindow/days.go
Normal file
@@ -0,0 +1,91 @@
|
||||
package timewindow
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// EveryDay contains all days of the week, and exports it
|
||||
// for convenience use in the cmd line arguments.
|
||||
var EveryDay = []string{"su", "mo", "tu", "we", "th", "fr", "sa"}
|
||||
|
||||
// dayStrings maps day strings to time.Weekdays
|
||||
var dayStrings = map[string]time.Weekday{
|
||||
"su": time.Sunday,
|
||||
"sun": time.Sunday,
|
||||
"sunday": time.Sunday,
|
||||
"mo": time.Monday,
|
||||
"mon": time.Monday,
|
||||
"monday": time.Monday,
|
||||
"tu": time.Tuesday,
|
||||
"tue": time.Tuesday,
|
||||
"tuesday": time.Tuesday,
|
||||
"we": time.Wednesday,
|
||||
"wed": time.Wednesday,
|
||||
"wednesday": time.Wednesday,
|
||||
"th": time.Thursday,
|
||||
"thu": time.Thursday,
|
||||
"thursday": time.Thursday,
|
||||
"fr": time.Friday,
|
||||
"fri": time.Friday,
|
||||
"friday": time.Friday,
|
||||
"sa": time.Saturday,
|
||||
"sat": time.Saturday,
|
||||
"saturday": time.Saturday,
|
||||
}
|
||||
|
||||
type weekdays uint32
|
||||
|
||||
// parseWeekdays creates a set of weekdays from a string slice
|
||||
func parseWeekdays(days []string) (weekdays, error) {
|
||||
var result uint32
|
||||
for _, day := range days {
|
||||
if len(day) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
weekday, err := parseWeekday(day)
|
||||
if err != nil {
|
||||
return weekdays(0), err
|
||||
}
|
||||
|
||||
result |= 1 << uint32(weekday)
|
||||
}
|
||||
|
||||
return weekdays(result), nil
|
||||
}
|
||||
|
||||
// Contains returns true if the specified weekday is a member of this set.
|
||||
func (w weekdays) Contains(day time.Weekday) bool {
|
||||
return uint32(w)&(1<<uint32(day)) != 0
|
||||
}
|
||||
|
||||
// String returns a string representation of the set of weekdays.
|
||||
func (w weekdays) String() string {
|
||||
var b strings.Builder
|
||||
for i := uint32(0); i < 7; i++ {
|
||||
if uint32(w)&(1<<i) != 0 {
|
||||
b.WriteString(time.Weekday(i).String()[0:3])
|
||||
} else {
|
||||
b.WriteString("---")
|
||||
}
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func parseWeekday(day string) (time.Weekday, error) {
|
||||
if n, err := strconv.Atoi(day); err == nil {
|
||||
if n >= 0 && n < 7 {
|
||||
return time.Weekday(n), nil
|
||||
}
|
||||
return time.Sunday, fmt.Errorf("Invalid weekday, number out of range: %s", day)
|
||||
}
|
||||
|
||||
if weekday, ok := dayStrings[strings.ToLower(day)]; ok {
|
||||
return weekday, nil
|
||||
}
|
||||
return time.Sunday, fmt.Errorf("Invalid weekday: %s", day)
|
||||
}
|
||||
46
pkg/timewindow/days_test.go
Normal file
46
pkg/timewindow/days_test.go
Normal file
@@ -0,0 +1,46 @@
|
||||
package timewindow
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseWeekdays(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
result string
|
||||
}{
|
||||
{"0,4", "Sun---------Thu------"},
|
||||
{"su,mo,tu", "SunMonTue------------"},
|
||||
{"sunday,tu,thu", "Sun---Tue---Thu------"},
|
||||
{"THURSDAY", "------------Thu------"},
|
||||
{"we,WED,WeDnEsDaY", "---------Wed---------"},
|
||||
{"", "---------------------"},
|
||||
{",,,", "---------------------"},
|
||||
}
|
||||
|
||||
for _, tst := range tests {
|
||||
res, err := parseWeekdays(strings.Split(tst.input, ","))
|
||||
if err != nil {
|
||||
t.Errorf("Received error for input %s: %v", tst.input, err)
|
||||
} else if res.String() != tst.result {
|
||||
t.Errorf("Test %s: Expected %s got %s", tst.input, tst.result, res.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWeekdaysErrors(t *testing.T) {
|
||||
tests := []string{
|
||||
"15",
|
||||
"-8",
|
||||
"8",
|
||||
"mon,tue,wed,fridayyyy",
|
||||
}
|
||||
|
||||
for _, tst := range tests {
|
||||
_, err := parseWeekdays(strings.Split(tst, ","))
|
||||
if err == nil {
|
||||
t.Errorf("Expected to receive error for input %s", tst)
|
||||
}
|
||||
}
|
||||
}
|
||||
81
pkg/timewindow/timewindow.go
Normal file
81
pkg/timewindow/timewindow.go
Normal file
@@ -0,0 +1,81 @@
|
||||
package timewindow
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TimeWindow specifies a schedule of days and times.
|
||||
type TimeWindow struct {
|
||||
days weekdays
|
||||
location *time.Location
|
||||
startTime time.Time
|
||||
endTime time.Time
|
||||
}
|
||||
|
||||
// New creates a TimeWindow instance based on string inputs specifying a schedule.
|
||||
func New(days []string, startTime, endTime, location string) (*TimeWindow, error) {
|
||||
tw := &TimeWindow{}
|
||||
|
||||
var err error
|
||||
if tw.days, err = parseWeekdays(days); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if tw.location, err = time.LoadLocation(location); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if tw.startTime, err = parseTime(startTime, tw.location); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if tw.endTime, err = parseTime(endTime, tw.location); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tw, nil
|
||||
}
|
||||
|
||||
// Contains determines whether the specified time is within this time window.
|
||||
func (tw *TimeWindow) Contains(t time.Time) bool {
|
||||
loctime := t.In(tw.location)
|
||||
if !tw.days.Contains(loctime.Weekday()) {
|
||||
return false
|
||||
}
|
||||
|
||||
start := time.Date(loctime.Year(), loctime.Month(), loctime.Day(), tw.startTime.Hour(), tw.startTime.Minute(), tw.startTime.Second(), 0, tw.location)
|
||||
end := time.Date(loctime.Year(), loctime.Month(), loctime.Day(), tw.endTime.Hour(), tw.endTime.Minute(), tw.endTime.Second(), 1e9-1, tw.location)
|
||||
|
||||
// Time Wrap validation
|
||||
// First we check for start and end time, if start is after end time
|
||||
// Next we need to validate if we want to wrap to the day before or to the day after
|
||||
// For that we check the loctime value to see if it is before end time, we wrap with the day before
|
||||
// Otherwise we wrap to the next day.
|
||||
if tw.startTime.After(tw.endTime) {
|
||||
if loctime.Before(end) {
|
||||
start = start.Add(-24 * time.Hour)
|
||||
} else {
|
||||
end = end.Add(24 * time.Hour)
|
||||
}
|
||||
}
|
||||
|
||||
return (loctime.After(start) || loctime.Equal(start)) && (loctime.Before(end) || loctime.Equal(end))
|
||||
}
|
||||
|
||||
// String returns a string representation of this time window.
|
||||
func (tw *TimeWindow) String() string {
|
||||
return fmt.Sprintf("%s between %02d:%02d and %02d:%02d %s", tw.days.String(), tw.startTime.Hour(), tw.startTime.Minute(), tw.endTime.Hour(), tw.endTime.Minute(), tw.location.String())
|
||||
}
|
||||
|
||||
// parseTime tries to parse a time with several formats.
|
||||
func parseTime(s string, loc *time.Location) (time.Time, error) {
|
||||
fmts := []string{"15:04", "15:04:05", "03:04pm", "15", "03pm", "3pm"}
|
||||
for _, f := range fmts {
|
||||
if t, err := time.ParseInLocation(f, s, loc); err == nil {
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
|
||||
return time.Now(), fmt.Errorf("Invalid time format: %s", s)
|
||||
}
|
||||
97
pkg/timewindow/timewindow_test.go
Normal file
97
pkg/timewindow/timewindow_test.go
Normal file
@@ -0,0 +1,97 @@
|
||||
package timewindow
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestTimeWindows(t *testing.T) {
|
||||
type testcase struct {
|
||||
time string
|
||||
result bool
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
days string
|
||||
start string
|
||||
end string
|
||||
loc string
|
||||
cases []testcase
|
||||
}{
|
||||
{"mon,tue,wed,thu,fri", "9am", "5pm", "America/Los_Angeles", []testcase{
|
||||
{"2019/03/31 10:00 PDT", false},
|
||||
{"2019/04/04 00:49 PDT", false},
|
||||
{"2019/04/04 12:00 PDT", true},
|
||||
{"2019/04/04 11:59 UTC", false},
|
||||
{"2019/04/05 08:59 PDT", false},
|
||||
{"2019/04/05 9:01 PDT", true},
|
||||
}},
|
||||
{"mon,we,fri", "10:01", "11:30am", "America/Los_Angeles", []testcase{
|
||||
{"2019/04/05 10:30 PDT", true},
|
||||
{"2019/04/06 10:30 PDT", false},
|
||||
{"2019/04/07 10:30 PDT", false},
|
||||
{"2019/04/08 10:30 PDT", true},
|
||||
{"2019/04/09 10:30 PDT", false},
|
||||
{"2019/04/10 10:30 PDT", true},
|
||||
{"2019/04/11 10:30 PDT", false},
|
||||
}},
|
||||
{"mo,tu,we,th,fr", "00:00", "23:59:59", "UTC", []testcase{
|
||||
{"2019/04/18 00:00 UTC", true},
|
||||
{"2019/04/18 23:59 UTC", true},
|
||||
}},
|
||||
{"mon,tue,wed,thu,fri", "9pm", "5am", "America/Los_Angeles", []testcase{
|
||||
{"2019/03/30 04:00 PDT", false},
|
||||
{"2019/03/31 10:00 PDT", false},
|
||||
{"2019/03/31 22:00 PDT", false},
|
||||
{"2019/04/04 00:49 PDT", true},
|
||||
{"2019/04/04 12:00 PDT", false},
|
||||
{"2019/04/04 22:49 PDT", true},
|
||||
{"2019/04/05 00:49 PDT", true},
|
||||
{"2019/04/05 08:59 PDT", false},
|
||||
{"2019/04/05 9:01 PDT", false},
|
||||
}},
|
||||
{"mon,tue,wed,thu,fri", "11:59pm", "00:01am", "America/Los_Angeles", []testcase{
|
||||
{"2019/04/04 23:58 PDT", false},
|
||||
{"2019/04/04 23:59 PDT", true},
|
||||
{"2019/04/05 00:00 PDT", true},
|
||||
{"2019/04/05 00:01 PDT", true},
|
||||
{"2019/04/05 00:02 PDT", false},
|
||||
}},
|
||||
{"mon,tue,wed,fri", "11:59pm", "00:01am", "America/Los_Angeles", []testcase{
|
||||
{"2019/04/04 23:58 PDT", false},
|
||||
{"2019/04/04 23:59 PDT", false}, // Even that this falls in the between the hours Thursday is not included so should not run
|
||||
{"2019/04/05 00:00 PDT", true},
|
||||
{"2019/04/05 00:02 PDT", false},
|
||||
}},
|
||||
{"mon,tue,wed,thu", "11:59pm", "00:01am", "America/Los_Angeles", []testcase{
|
||||
{"2019/04/04 23:58 PDT", false},
|
||||
{"2019/04/04 23:59 PDT", true},
|
||||
{"2019/04/05 00:00 PDT", false}, // Even that this falls in the between the hours Friday is not included so should not run
|
||||
{"2019/04/05 00:02 PDT", false},
|
||||
}},
|
||||
{"mon,tue,wed,thu,fri", "11:59pm", "00:01am", "UTC", []testcase{
|
||||
{"2019/04/04 23:58 UTC", false},
|
||||
{"2019/04/04 23:59 UTC", true},
|
||||
{"2019/04/05 00:00 UTC", true},
|
||||
{"2019/04/05 00:01 UTC", true},
|
||||
{"2019/04/05 00:02 UTC", false},
|
||||
}},
|
||||
}
|
||||
|
||||
for i, tst := range tests {
|
||||
tw, err := New(strings.Split(tst.days, ","), tst.start, tst.end, tst.loc)
|
||||
if err != nil {
|
||||
t.Errorf("Test [%d] failed to create TimeWindow: %v", i, err)
|
||||
}
|
||||
|
||||
for _, cas := range tst.cases {
|
||||
tm, err := time.ParseInLocation("2006/01/02 15:04 MST", cas.time, tw.location)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to parse time \"%s\": %v", cas.time, err)
|
||||
} else if cas.result != tw.Contains(tm) {
|
||||
t.Errorf("(%s) contains (%s) didn't match expected result of %v", tw.String(), cas.time, cas.result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
12
tests/kind/create-reboot-sentinels.sh
Executable file
12
tests/kind/create-reboot-sentinels.sh
Executable file
@@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# USE KUBECTL_CMD to pass context and/or namespaces.
|
||||
KUBECTL_CMD="${KUBECTL_CMD:-kubectl}"
|
||||
SENTINEL_FILE="${SENTINEL_FILE:-/var/run/reboot-required}"
|
||||
|
||||
echo "Creating reboot sentinel on all nodes"
|
||||
|
||||
for nodename in $("$KUBECTL_CMD" get nodes -o name); do
|
||||
docker exec "${nodename/node\//}" hostname
|
||||
docker exec "${nodename/node\//}" touch "${SENTINEL_FILE}"
|
||||
done
|
||||
91
tests/kind/follow-coordinated-reboot.sh
Executable file
91
tests/kind/follow-coordinated-reboot.sh
Executable file
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
NODECOUNT=${NODECOUNT:-5}
|
||||
KUBECTL_CMD="${KUBECTL_CMD:-kubectl}"
|
||||
DEBUG="${DEBUG:-false}"
|
||||
CONTAINER_NAME_FORMAT=${CONTAINER_NAME_FORMAT:-"chart-testing-*"}
|
||||
|
||||
tmp_dir=$(mktemp -d -t kured-XXXX)
|
||||
function gather_logs_and_cleanup {
|
||||
if [[ -f "$tmp_dir"/node_output ]]; then
|
||||
rm "$tmp_dir"/node_output
|
||||
fi
|
||||
rmdir "$tmp_dir"
|
||||
|
||||
# The next commands are useful regardless of success or failures.
|
||||
if [[ "$DEBUG" == "true" ]]; then
|
||||
echo "############################################################"
|
||||
# This is useful to see if containers have crashed.
|
||||
echo "docker ps -a:"
|
||||
docker ps -a
|
||||
echo "docker journal logs"
|
||||
journalctl -u docker --no-pager
|
||||
|
||||
# This is useful to see if the nodes have _properly_ rebooted.
|
||||
# It should show the reboot/two container starts per node.
|
||||
for name in $(docker ps -a -f "name=${CONTAINER_NAME_FORMAT}" -q); do
|
||||
echo "############################################################"
|
||||
echo "docker logs for container $name:"
|
||||
docker logs "$name"
|
||||
done
|
||||
|
||||
fi
|
||||
}
|
||||
trap gather_logs_and_cleanup EXIT
|
||||
|
||||
declare -A was_unschedulable
|
||||
declare -A has_recovered
|
||||
max_attempts="60"
|
||||
sleep_time=60
|
||||
attempt_num=1
|
||||
|
||||
set +o errexit
|
||||
echo "There are $NODECOUNT nodes in the cluster"
|
||||
until [ ${#was_unschedulable[@]} == "$NODECOUNT" ] && [ ${#has_recovered[@]} == "$NODECOUNT" ]
|
||||
do
|
||||
echo "${#was_unschedulable[@]} nodes were removed from pool once:" "${!was_unschedulable[@]}"
|
||||
echo "${#has_recovered[@]} nodes removed from the pool are now back:" "${!has_recovered[@]}"
|
||||
|
||||
#"$KUBECTL_CMD" logs -n kube-system -l name=kured --ignore-errors > "$tmp_dir"/node_output
|
||||
#if [[ "$DEBUG" == "true" ]]; then
|
||||
# echo "Kured pod logs:"
|
||||
# cat "$tmp_dir"/node_output
|
||||
#fi
|
||||
|
||||
"$KUBECTL_CMD" get nodes -o custom-columns=NAME:.metadata.name,SCHEDULABLE:.spec.unschedulable --no-headers > "$tmp_dir"/node_output
|
||||
if [[ "$DEBUG" == "true" ]]; then
|
||||
# This is useful to see if a node gets stuck after drain, and doesn't
|
||||
# come back up.
|
||||
echo "Result of command $KUBECTL_CMD get nodes ... showing unschedulable nodes:"
|
||||
cat "$tmp_dir"/node_output
|
||||
fi
|
||||
while read -r node; do
|
||||
unschedulable=$(echo "$node" | grep true | cut -f 1 -d ' ')
|
||||
if [ -n "$unschedulable" ] && [ -z ${was_unschedulable["$unschedulable"]+x} ] ; then
|
||||
echo "$unschedulable is now unschedulable!"
|
||||
was_unschedulable["$unschedulable"]=1
|
||||
fi
|
||||
schedulable=$(echo "$node" | grep '<none>' | cut -f 1 -d ' ')
|
||||
if [ -n "$schedulable" ] && [ ${was_unschedulable["$schedulable"]+x} ] && [ -z ${has_recovered["$schedulable"]+x} ]; then
|
||||
echo "$schedulable has recovered!"
|
||||
has_recovered["$schedulable"]=1
|
||||
fi
|
||||
done < "$tmp_dir"/node_output
|
||||
|
||||
if [[ "${#has_recovered[@]}" == "$NODECOUNT" ]]; then
|
||||
echo "All nodes recovered."
|
||||
break
|
||||
else
|
||||
if (( attempt_num == max_attempts ))
|
||||
then
|
||||
echo "Attempt $attempt_num failed and there are no more attempts left!"
|
||||
exit 1
|
||||
else
|
||||
echo "Attempt $attempt_num failed! Trying again in $sleep_time seconds..."
|
||||
sleep "$sleep_time"
|
||||
fi
|
||||
fi
|
||||
(( attempt_num++ ))
|
||||
done
|
||||
|
||||
echo "Test successful"
|
||||
19
tests/kind/test-metrics.sh
Executable file
19
tests/kind/test-metrics.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
expected="$1"
|
||||
if [[ "$expected" != "0" && "$expected" != "1" ]]; then
|
||||
echo "You should give an argument to this script, the gauge value (0 or 1)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
HOST="${HOST:-localhost}"
|
||||
PORT="${PORT:-30000}"
|
||||
NODENAME="${NODENAME-chart-testing-control-plane}"
|
||||
|
||||
reboot_required=$(docker exec "$NODENAME" curl "http://$HOST:$PORT/metrics" | awk '/^kured_reboot_required/{print $2}')
|
||||
if [[ "$reboot_required" == "$expected" ]]; then
|
||||
echo "Test success"
|
||||
else
|
||||
echo "Test failed"
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user