mirror of
https://github.com/krkn-chaos/krkn.git
synced 2026-02-14 18:10:00 +00:00
Add support for user-provided default network ACL (#731)
* Add support for user-provided default network ACL Signed-off-by: henrick <self@thehenrick.com> * Add logs to notify user when their provided acl is used Signed-off-by: henrick <self@thehenrick.com> * Update docs to include optional default_acl_id parameter in zone_outage Signed-off-by: henrick <self@thehenrick.com> --------- Signed-off-by: henrick <self@thehenrick.com> Co-authored-by: henrick <self@thehenrick.com>
This commit is contained in:
@@ -13,10 +13,12 @@ zone_outage: # Scenario to create an out
|
||||
duration: 600 # Duration in seconds after which the zone will be back online.
|
||||
vpc_id: # Cluster virtual private network to target.
|
||||
subnet_id: [subnet1, subnet2] # List of subnet-id's to deny both ingress and egress traffic.
|
||||
default_acl_id: acl-xxxxxxxx # (Optional) ID of an existing network ACL to use instead of creating a new one. If provided, this ACL will not be deleted after the scenario.
|
||||
```
|
||||
|
||||
**NOTE**: vpc_id and subnet_id can be obtained from the cloud web console by selecting one of the instances in the targeted zone ( us-west-2a for example ).
|
||||
**NOTE**: Multiple zones will experience downtime in case of targeting multiple subnets which might have an impact on the cluster health especially if the zones have control plane components deployed.
|
||||
**NOTE**: default_acl_id can be obtained from the AWS VPC Console by selecting "Network ACLs" from the left sidebar ( the ID will be in the format 'acl-xxxxxxxx' ). Make sure the selected ACL has the desired ingress/egress rules for your outage scenario ( i.e., deny all ).
|
||||
|
||||
##### Debugging steps in case of failures
|
||||
In case of failures during the steps which revert back the network acl to allow traffic and bring back the cluster nodes in the zone, the nodes in the particular zone will be in `NotReady` condition. Here is how to fix it:
|
||||
|
||||
@@ -29,6 +29,8 @@ class ZoneOutageScenarioPlugin(AbstractScenarioPlugin):
|
||||
subnet_ids = scenario_config["subnet_id"]
|
||||
duration = scenario_config["duration"]
|
||||
cloud_type = scenario_config["cloud_type"]
|
||||
# Add support for user-provided default network ACL
|
||||
default_acl_id = scenario_config.get("default_acl_id")
|
||||
ids = {}
|
||||
acl_ids_created = []
|
||||
|
||||
@@ -58,7 +60,20 @@ class ZoneOutageScenarioPlugin(AbstractScenarioPlugin):
|
||||
"Network association ids associated with "
|
||||
"the subnet %s: %s" % (subnet_id, network_association_ids)
|
||||
)
|
||||
acl_id = cloud_object.create_default_network_acl(vpc_id)
|
||||
|
||||
# Use provided default ACL if available, otherwise create a new one
|
||||
if default_acl_id:
|
||||
acl_id = default_acl_id
|
||||
logging.info(
|
||||
"Using provided default ACL ID %s - this ACL will not be deleted after the scenario",
|
||||
default_acl_id
|
||||
)
|
||||
# Don't add to acl_ids_created since we don't want to delete user-provided ACLs at cleanup
|
||||
else:
|
||||
acl_id = cloud_object.create_default_network_acl(vpc_id)
|
||||
logging.info("Created new default ACL %s", acl_id)
|
||||
acl_ids_created.append(acl_id)
|
||||
|
||||
new_association_id = cloud_object.replace_network_acl_association(
|
||||
network_association_ids[0], acl_id
|
||||
)
|
||||
@@ -66,7 +81,6 @@ class ZoneOutageScenarioPlugin(AbstractScenarioPlugin):
|
||||
# capture the orginal_acl_id, created_acl_id and
|
||||
# new association_id to use during the recovery
|
||||
ids[new_association_id] = original_acl_id
|
||||
acl_ids_created.append(acl_id)
|
||||
|
||||
# wait for the specified duration
|
||||
logging.info(
|
||||
|
||||
@@ -3,3 +3,4 @@ zone_outage: # Scenario to create an out
|
||||
duration: 600 # duration in seconds after which the zone will be back online
|
||||
vpc_id: # cluster virtual private network to target
|
||||
subnet_id: [subnet1, subnet2] # List of subnet-id's to deny both ingress and egress traffic
|
||||
default_acl_id: acl-xxxxxxxx # (Optional) ID of an existing network ACL to use instead of creating a new one. If provided, this ACL will not be deleted after the scenario.
|
||||
|
||||
Reference in New Issue
Block a user