LAMBDA_ZIP=/tmp/lambda.zip
cat << EOF > /tmp/lambda.py
from urllib.error import HTTPError
import boto3
import jmespath
import json
import os
import urllib3
from base64 import b64decode
from urllib.parse import unquote
# Prevent unverified HTTPS connection warning
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class MissingEnvironmentVariable(Exception):
pass
class MissingSiteUrl(Exception):
pass
def env(name):
if name in os.environ:
return os.environ[name]
raise MissingEnvironmentVariable(f"Environment Variable '{name}' must be set")
def handle_site_offline(labels):
a_client = boto3.client('globalaccelerator', region_name='us-west-2')
acceleratorDNS = labels['accelerator']
accelerator = jmespath.search(f"Accelerators[?(DnsName=='{acceleratorDNS}'|| DualStackDnsName=='{acceleratorDNS}')]", a_client.list_accelerators())
if not accelerator:
print(f"Ignoring SiteOffline alert as accelerator with DnsName '{acceleratorDNS}' not found")
return
accelerator_arn = accelerator[0]['AcceleratorArn']
listener_arn = a_client.list_listeners(AcceleratorArn=accelerator_arn)['Listeners'][0]['ListenerArn']
endpoint_group = a_client.list_endpoint_groups(ListenerArn=listener_arn)['EndpointGroups'][0]
endpoints = endpoint_group['EndpointDescriptions']
# Only update accelerator endpoints if two entries exist
if len(endpoints) > 1:
# If the reporter endpoint is not healthy then do nothing for now
# A Lambda will eventually be triggered by the other offline site for this reporter
reporter = labels['reporter']
reporter_endpoint = [e for e in endpoints if endpoint_belongs_to_site(e, reporter)][0]
if reporter_endpoint['HealthState'] == 'UNHEALTHY':
print(f"Ignoring SiteOffline alert as reporter '{reporter}' endpoint is marked UNHEALTHY")
return
offline_site = labels['site']
endpoints = [e for e in endpoints if not endpoint_belongs_to_site(e, offline_site)]
del reporter_endpoint['HealthState']
a_client.update_endpoint_group(
EndpointGroupArn=endpoint_group['EndpointGroupArn'],
EndpointConfigurations=endpoints
)
print(f"Removed site={offline_site} from Accelerator EndpointGroup")
take_infinispan_site_offline(reporter, offline_site)
print(f"Backup site={offline_site} caches taken offline")
else:
print("Ignoring SiteOffline alert only one Endpoint defined in the EndpointGroup")
def endpoint_belongs_to_site(endpoint, site):
lb_arn = endpoint['EndpointId']
region = lb_arn.split(':')[3]
client = boto3.client('elbv2', region_name=region)
tags = client.describe_tags(ResourceArns=[lb_arn])['TagDescriptions'][0]['Tags']
for tag in tags:
if tag['Key'] == 'site':
return tag['Value'] == site
return false
def take_infinispan_site_offline(reporter, offlinesite):
endpoints = json.loads(INFINISPAN_SITE_ENDPOINTS)
if reporter not in endpoints:
raise MissingSiteUrl(f"Missing URL for site '{reporter}' in 'INFINISPAN_SITE_ENDPOINTS' json")
endpoint = endpoints[reporter]
password = get_secret(INFINISPAN_USER_SECRET)
url = f"https://{endpoint}/rest/v2/container/x-site/backups/{offlinesite}?action=take-offline"
http = urllib3.PoolManager(cert_reqs='CERT_NONE')
headers = urllib3.make_headers(basic_auth=f"{INFINISPAN_USER}:{password}")
try:
rsp = http.request("POST", url, headers=headers)
if rsp.status >= 400:
raise HTTPError(f"Unexpected response status '%d' when taking site offline", rsp.status)
rsp.release_conn()
except HTTPError as e:
print(f"HTTP error encountered: {e}")
def get_secret(secret_name):
session = boto3.session.Session()
client = session.client(
service_name='secretsmanager',
region_name=SECRETS_REGION
)
return client.get_secret_value(SecretId=secret_name)['SecretString']
def decode_basic_auth_header(encoded_str):
split = encoded_str.strip().split(' ')
if len(split) == 2:
if split[0].strip().lower() == 'basic':
try:
username, password = b64decode(split[1]).decode().split(':', 1)
except:
raise DecodeError
else:
raise DecodeError
else:
raise DecodeError
return unquote(username), unquote(password)
def handler(event, context):
print(json.dumps(event))
authorization = event['headers'].get('authorization')
if authorization is None:
print("'Authorization' header missing from request")
return {
"statusCode": 401
}
expectedPass = get_secret(WEBHOOK_USER_SECRET)
username, password = decode_basic_auth_header(authorization)
if username != WEBHOOK_USER and password != expectedPass:
print('Invalid username/password combination')
return {
"statusCode": 403
}
body = event.get('body')
if body is None:
raise Exception('Empty request body')
body = json.loads(body)
print(json.dumps(body))
if body['status'] != 'firing':
print("Ignoring alert as status is not 'firing', status was: '%s'" % body['status'])
return {
"statusCode": 204
}
for alert in body['alerts']:
labels = alert['labels']
if labels['alertname'] == 'SiteOffline':
handle_site_offline(labels)
return {
"statusCode": 204
}
INFINISPAN_USER = env('INFINISPAN_USER')
INFINISPAN_USER_SECRET = env('INFINISPAN_USER_SECRET')
INFINISPAN_SITE_ENDPOINTS = env('INFINISPAN_SITE_ENDPOINTS')
SECRETS_REGION = env('SECRETS_REGION')
WEBHOOK_USER = env('WEBHOOK_USER')
WEBHOOK_USER_SECRET = env('WEBHOOK_USER_SECRET')
EOF
zip -FS --junk-paths ${LAMBDA_ZIP} /tmp/lambda.py