Skip to content
Open
Changes from 3 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ba1b46a
add basic setup for the ansible module
AmitPhulera Jun 1, 2026
a432ff3
get region and init ec2 client
AmitPhulera Jun 1, 2026
1cc0e41
have a lightweight context class to bundle ansible and boto clients, …
AmitPhulera Jun 1, 2026
2be5cf9
add basic structure for all the commands that module will support
AmitPhulera Jun 2, 2026
8c3e9b1
allow test ansible module to capture warnings as well
AmitPhulera Jun 2, 2026
6e7ea8f
add basic tests for module's basic calling checks
AmitPhulera Jun 2, 2026
29991c2
have an Instance class to represent the instance state
AmitPhulera Jun 2, 2026
1002e41
add FakeEC2Instance class to be used in the tests
AmitPhulera Jun 2, 2026
d2f78e7
describe instances
AmitPhulera Jun 2, 2026
5b26467
start instances
AmitPhulera Jun 2, 2026
ea6ed5f
implement stop
AmitPhulera Jun 2, 2026
036229a
implement stop_and_start
AmitPhulera Jun 2, 2026
974a90e
lint
AmitPhulera Jun 3, 2026
813a7d5
don't update library path to call ansible module, it is being taken c…
AmitPhulera Jun 5, 2026
6d3d240
remove redundant list casting
AmitPhulera Jun 5, 2026
69749cb
fail with module.fail_json if client is not available
AmitPhulera Jun 5, 2026
88b69ed
rename property name to be same as variable name
AmitPhulera Jun 5, 2026
10e8703
remove docstrings that are less helpful
AmitPhulera Jun 5, 2026
46189a2
nits from review
AmitPhulera Jun 5, 2026
f65f251
fix tests
AmitPhulera Jun 5, 2026
02e4d10
refactor: from review
AmitPhulera Jun 5, 2026
9e12be6
use f strings everywhere in tests
AmitPhulera Jun 5, 2026
616ad06
move module functions to InstanceManager to remove _Ctx class
AmitPhulera Jun 5, 2026
111910c
fix failing test - this failed because the start_and_stop was relying…
AmitPhulera Jun 5, 2026
64231cc
Merge remote-tracking branch 'origin/master' into ap/restart-ans-mod
AmitPhulera Jun 8, 2026
cf25fe2
remove return statements that were added to safeguard that tests retu…
AmitPhulera Jun 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 186 additions & 0 deletions src/commcare_cloud/ansible/library/ec2_instance_state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#! /usr/bin/env python3
"""Custom Ansible module to start/stop/stop_and_start/describe EC2 instances."""
import os
import re
from enum import Enum

from ansible.module_utils.basic import AnsibleModule

DOCUMENTATION = """
---
module: ec2_instance_state

short_description: Start, stop, stop_and_start, or describe EC2 instances by ID.

description:
- Manages the running state of EC2 instances given an explicit list of
instance IDs. Supports four commands - describe, start, stop, stop_and_start,
and is idempotent (no API call is made if the instance is already in the requested state).
- Designed to run with delegate_to localhost. AWS credentials and the target region are picked up from the standard boto3 credential chain; in the commcare-cloud workflow the AWS_PROFILE and AWS_REGION environment variables are exported automatically before ansible runs.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm just not too familiar with this. Do you mind elaborating on the "delete_to localhost" workflow?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is delegate_to localhost. This is the command that we want to run locally on the host from where the command was run instead of running it on the target machine.
For this case the AWS credentials will be on our local system so we want to run the module from our local systems.


version_added: "1.0.0"

options:
instance_ids:
description: List of EC2 instance IDs to act on.
required: true
type: list
elements: str
command:
description: Command to execute.
required: true
type: str
choices: [describe, start, stop, stop_and_start]
region:
description: >
AWS region. Falls back to the AWS_REGION environment variable
when omitted. Module fails if neither is set.
required: false
type: str
wait:
description: >
Block until the final target state (running/stopped) is reached.
Ignored for describe. Transition preconditions are always waited
for regardless of this setting: a 'stopping' instance is awaited to
'stopped' before it is started, and a 'pending' instance is awaited
to 'running' before it is stopped. For stop_and_start the stop
phase always waits; this setting governs only the final start phase.
required: false
default: true
type: bool

author:
- Amit Phulera
"""

EXAMPLES = """
- name: Stop and start a single host (region picked up from AWS_REGION env var)
ec2_instance_state:
instance_ids:
- "{{ hostvars['10.201.11.133'].ec2_instance_id }}"
command: stop_and_start
delegate_to: localhost

- name: Stop all webworkers in batch
ec2_instance_state:
instance_ids: >-
{{ groups['webworkers']
| map('extract', hostvars, 'ec2_instance_id')
| list }}
command: stop
delegate_to: localhost

- name: Describe instances in a non-default region
ec2_instance_state:
instance_ids: ["i-0123456789abcdef0"]
command: describe
region: us-west-2
delegate_to: localhost
"""

RETURN = """
changed:
description: True if this run mutated AWS state.
type: bool
command:
description: The requested command, echoed back.
type: str
instances:
description: One entry per requested instance, in input order.
type: list
elements: dict
unchanged_instance_ids:
description: IDs that needed no action because they were already in the target state.
type: list
elements: str
diff:
description: Per-instance state map before/after this run.
type: dict
"""


class InstanceCommand(str, Enum):
DESCRIBE = 'describe'
START = 'start'
STOP = 'stop'
STOP_AND_START = 'stop_and_start'

INSTANCE_ID_RE = re.compile(r'^i-([0-9a-f]{8}|[0-9a-f]{17})$')

# EC2 instance lifecycle states as returned by DescribeInstances (State.Name).
class InstanceState(str, Enum):
PENDING = 'pending'
RUNNING = 'running'
STOPPING = 'stopping'
STOPPED = 'stopped'
SHUTTING_DOWN = 'shutting-down'
TERMINATED = 'terminated'

TERMINATED_STATES = {InstanceState.TERMINATED, InstanceState.SHUTTING_DOWN}


def _get_region(module):
"""Return the region from params, falling back to AWS_REGION env var."""
region = module.params.get('region') or os.environ.get('AWS_REGION')
if not region:
module.fail_json(msg=(
"AWS region not provided. Pass 'region' to the module, "
"or set the AWS_REGION environment variable."
))
return region


def _get_ec2_client(region):
"""Return a boto3 EC2 client. Defined as a module-level function so tests can patch it."""
try:
import boto3
except ImportError:
raise RuntimeError(
"boto3 is required by ec2_instance_state but is not installed."
)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not call module.fail_json directly here, similar to what _get_region does, and then that way main doesn't need to wrap the call to _get_ec2_client in a try/except.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats a good point. Will update it.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return boto3.client('ec2', region_name=region)

class _Ctx:

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not my favorite name, but need to continue reviewing to offer suggestions.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All of the functions that accept this type (like _do_start(ctx, ...)) look like instance methods. What do you think of renaming this to something like StopStarter and moving those functions to methods?

class StopStarter:

    def __init__(self, client, module):
        self.client = client
        self.module = module

    def describe(self, instance_ids):
        ...

    def start(self, instance_ids, wait):
        ...

    def stop(self, instance_ids, wait):
        ...

    ...

Feel free to pick a different name, that was just the first thing that come to mind.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @millerdev. Used EC2InstanceManager class name 616ad06
This looks much better.

"""Per-run context shared by the flow helpers.

Bundles the EC2 client, the AnsibleModule, so these don't have to be
passed as arguments to every helper.
"""

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"""Per-run context shared by the flow helpers.
Bundles the EC2 client, the AnsibleModule, so these don't have to be
passed as arguments to every helper.
"""
"""
Bundles the EC2 client and Ansible module for convenience
"""

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


def __init__(self, client, module):
self.client = client
self.module = module


def main():
module_args = {
'instance_ids': {'type': 'list', 'elements': 'str', 'required': True},
'command': {'type': 'str', 'required': True, 'choices': [c.value for c in InstanceCommand]},
'region': {'type': 'str', 'required': False, 'default': None},
'wait': {'type': 'bool', 'required': False, 'default': True},
}
module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
params = module.params

instance_ids = params['instance_ids']
if not instance_ids:
module.fail_json(msg="'instance_ids' must be a non-empty list.")

bad = [i for i in instance_ids if not INSTANCE_ID_RE.match(i)]
if bad:

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit

Suggested change
bad = [i for i in instance_ids if not INSTANCE_ID_RE.match(i)]
if bad:
bad_ids = [i for i in instance_ids if not INSTANCE_ID_RE.match(i)]
if bad_ids:

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

module.fail_json(msg=f"Malformed instance IDs: {bad!r}")

region = _get_region(module)

try:
client = _get_ec2_client(region)
except RuntimeError as e:
module.fail_json(msg=str(e))

ctx = _Ctx(client, module)

module.exit_json()


if __name__ == '__main__':
main()