diff --git a/CHANGELOG.md b/CHANGELOG.md index e9530e9..662091c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,98 @@ +## 0.3.7 + +ENHANCEMENTS: + +* Add elasticloadbalancing:AddTags IAM permission to deploy_access_role to comply with upcoming AWS security changes. + +## 0.3.6 + +BUG FIXES: + +* Actually retry ASG creation. + +## 0.3.5 + +BUG FIXES: + +* Fix exception when retrying ASG creation. + +## 0.3.4 + +ENHANCEMENTS: + +* Now supports AutoScaling Group metrics. + +## 0.3.3 + +ENHANCEMENTS: + +* Retry creating AutoScaling Group. + +## 0.3.2 + +ENHANCEMENTS: + +* Permit longer names for automatic undeployment rules to minimize conflicts. + +## 0.3.1 + +BUG FIXES: + +* Cancelling a deploy of a web service during its Bake period now correctly rolls back to the previous production deploy. + +## 0.3.0 + +ENHANCEMENTS: + +* Deploy, undeploy, and cancel now support services which have multiple load balancer rules directing traffic to them. + +## 0.2.22 + +ENHANCEMENTS: + +* Target group attributes (deregistration delay, stickiness, load balancing algorithm) are now supported. + +## 0.2.21 + +BUG FIXES: + +* Syntax error. + +## 0.2.20 + +BUG FIXES: + +* Limit length of target group name, but better. + +## 0.2.19 + +BUG FIXES: + +* Limit length of ASG/target group names to avoid naming conflicts. + +## 0.2.18 + +BUG FIXES: + +* Limit length of automatic undeploy rule name, but better. + +## 0.2.17 + +BUG FIXES: + +* Limit length of automatic undeploy rule name. + +## 0.2.16 + +BUG FIXES: + +* Fixed language for starting undeployment slack notification. +* Support external id when assuming roles ([#2](https://github.com/GoCarrot/terraform-aws-deployomat/pull/2)) + +SPECIAL THANKS: + +* [@MrJoy](https://github.com/MrJoy) + ## 0.2.15 ENHANCEMENTS: diff --git a/modules/deploy_access_role/main.tf b/modules/deploy_access_role/main.tf index 011aa13..afe86b6 100644 --- a/modules/deploy_access_role/main.tf +++ b/modules/deploy_access_role/main.tf @@ -39,7 +39,45 @@ data "aws_default_tags" "tags" {} data "aws_iam_policy_document" "allow-deployomat-assume" { statement { actions = [ - "sts:AssumeRole", + "sts:AssumeRole" + ] + + principals { + type = "AWS" + identifiers = formatlist("arn:${data.aws_partition.current.partition}:iam::%s:root", var.ci_cd_account_ids) + } + + condition { + test = "StringEquals" + variable = "aws:ResourceTag/Environment" + values = ["&{aws:PrincipalTag/Environment}"] + } + + condition { + test = "StringEquals" + variable = "aws:PrincipalTag/Service" + values = [var.deployomat_service_name] + } + + condition { + test = "ForAllValues:StringEquals" + variable = "aws:TagKeys" + values = ["ServiceName", "ServiceLogName"] + } + + dynamic "condition" { + for_each = var.external_id != null ? [1] : [] + + content { + test = "StringEquals" + variable = "sts:ExternalId" + values = [var.external_id] + } + } + } + + statement { + actions = [ "sts:TagSession" ] @@ -104,7 +142,8 @@ data "aws_iam_policy_document" "allow-deploy" { "autoscaling:AttachLoadBalancerTargetGroups", "autoscaling:PutScalingPolicy", "autoscaling:PutWarmPool", - "autoscaling:UpdateAutoScalingGroup" + "autoscaling:UpdateAutoScalingGroup", + "autoscaling:EnableMetricsCollection" ] resources = [ @@ -168,7 +207,8 @@ data "aws_iam_policy_document" "allow-deploy" { statement { actions = [ "elasticloadbalancing:CreateRule", - "elasticloadbalancing:CreateTargetGroup" + "elasticloadbalancing:CreateTargetGroup", + "elasticloadbalancing:AddTags" ] resources = [ diff --git a/modules/deployomat/main.tf b/modules/deployomat/main.tf index 273dd63..cd36e4e 100644 --- a/modules/deployomat/main.tf +++ b/modules/deployomat/main.tf @@ -83,7 +83,7 @@ resource "aws_iam_policy" "deployomat-lambda-logging" { } locals { - automatic_undeploy_rule_arn = "arn:${data.aws_partition.current.partition}:events:*:${data.aws_caller_identity.current.account_id}:rule/*-automatic-undeploy" + automatic_undeploy_rule_arn = "arn:${data.aws_partition.current.partition}:events:*:${data.aws_caller_identity.current.account_id}:rule/*-undeploy" } data "aws_iam_policy_document" "deployomat-lambda" { diff --git a/modules/deployomat/src/cancel.rb b/modules/deployomat/src/cancel.rb index c0aa516..0eb6a5c 100644 --- a/modules/deployomat/src/cancel.rb +++ b/modules/deployomat/src/cancel.rb @@ -72,17 +72,17 @@ def call def reset_listeners(listeners, production_asg, deploy_asg) elbv2 = ElbV2.new(@config) - target_group = production_asg&.target_group_arns&.first || deploy_asg&.target_group_arns&.first + target_groups = [production_asg&.target_group_arns&.first, deploy_asg&.target_group_arns&.first].compact - production_rules = listeners.map do |(key, listener)| + production_rules = listeners.flat_map do |(key, listener)| if !listener listener = key key = nil end - puts "Identifying deploy rule for #{key} listener #{listener}" - elbv2.find_rule_with_target_in_listener( - listener, target_group + puts "Identifying deploy rules for #{key} listener #{listener}" + elbv2.find_rules_with_targets_in_listener( + listener, target_groups ) end.compact @@ -95,7 +95,11 @@ def reset_listeners(listeners, production_asg, deploy_asg) if production_asg puts "Coalescing on production asg #{production_asg.auto_scaling_group_name}" production_rules.each do |rule| - elbv2.coalesce(rule, production_asg.target_group_arns.first) + if !elbv2.coalesce(rule, production_asg.target_group_arns.first) + puts "Could not coalesece rule #{rule.rule_arn}. Destroying instead." + elbv2.delete_rule(rule.rule_arn) + puts "Destroyed #{rule.rule_arn}" + end end puts "Coalesced." return :wait diff --git a/modules/deployomat/src/deploy.rb b/modules/deployomat/src/deploy.rb index 4ae0187..bfcb63c 100644 --- a/modules/deployomat/src/deploy.rb +++ b/modules/deployomat/src/deploy.rb @@ -17,6 +17,10 @@ require_relative 'lib' module Deployomat + ACTIVE_DEPLOY_MSG = "Asserting active deploy" + NOT_ACTIVE_MSG = "No longer active deploy." + ACTIVE_ASSERT_MSG = "Asserted active" + class StartDeploy extend Forwardable @@ -28,7 +32,7 @@ class StartDeploy def_delegators :@config, :account_name, :service_name, :prefix, :deploy_id, :params - attr_reader :ami_id, :new_asg_name, :bake_time, :health_timeout, + attr_reader :ami_id, :new_asg_name, :bake_time, :health_timeout, :new_tg_name, :traffic_shift_per_step, :wait_per_step, :allow_undeploy, :automatic_undeploy_minutes GREATER_THAN_ZERO = %i[bake_time traffic_shift_per_step wait_per_step health_timeout].freeze @@ -38,6 +42,7 @@ def initialize(config, ami_id:, deploy_config:) @config = config @ami_id = ami_id @new_asg_name = "#{service_name}-#{Time.now.utc.strftime("%Y%m%dT%H%M%SZ")}" + @new_tg_name = "#{service_name[0...15]}-#{Time.now.utc.strftime("%Y%m%dT%H%M%SZ")}" # TODO: These should be configurable. @bake_time = deploy_config.fetch('BakeTime', DEFAULT_BAKE_TIME) @@ -128,7 +133,7 @@ def call new_target_group_arn = nil if exemplar_tg_arn puts "Cloning target group..." - new_target_group = elbv2.clone_target_group(exemplar_tg_arn, new_asg_name) + new_target_group = elbv2.clone_target_group(exemplar_tg_arn, new_tg_name) new_target_group_arn = new_target_group.target_group_arn puts "Cloned target group #{new_target_group_arn}" end @@ -150,14 +155,14 @@ def call if production_asg - puts "Asserting active deploy" + puts ACTIVE_DEPLOY_MSG begin @config.assert_active rescue Aws::DynamoDB::Errors::ConditionalCheckFailedException - error "No longer active deploy." + error NOT_ACTIVE_MSG return error_response end - puts "Asserted active" + puts ACTIVE_ASSERT_MSG puts "Preventing scale-in of #{production_asg.auto_scaling_group_name}" asg.prevent_scale_in(production_asg) end @@ -167,14 +172,14 @@ def call if exemplar_tg_arn production_rules = [] listeners = params.get_list_or_json("#{prefix}/config/#{service_name}/listener_arns") - listeners.each do |(key, listener)| + production_rules = listeners.flat_map do |(key, listener)| if !listener listener = key key = nil end - puts "Preparing deploy rule for #{key} listener #{listener}..." - production_rules << elbv2.prepare_deploy_rule( + puts "Preparing deploy rules for #{key} listener #{listener}..." + elbv2.prepare_deploy_rules( listener, production_tg_arn, exemplar_tg_arn, new_target_group_arn ) end @@ -264,14 +269,14 @@ def initialize(config, max_wait:, min_healthy:, target_group_arn:, remaining_tim def call elbv2 = ElbV2.new(@config) - puts "Asserting active deploy" + puts ACTIVE_DEPLOY_MSG begin @config.assert_active rescue Aws::DynamoDB::Errors::ConditionalCheckFailedException - puts "No longer active deploy." + puts NOT_ACTIVE_MSG return { Status: :deploy_aborted } end - puts "Asserted active" + puts ACTIVE_ASSERT_MSG healthy_count = elbv2.count_healthy(target_group_arn) puts "Waiting for #{min_healthy} healthy instances, have #{healthy_count}" @@ -310,14 +315,14 @@ def call total = progress + step_size production_rules.each do |rule| - puts "Asserting active deploy" + puts ACTIVE_DEPLOY_MSG begin @config.assert_active rescue Aws::DynamoDB::Errors::ConditionalCheckFailedException - puts "No longer active deploy." + puts NOT_ACTIVE_MSG return { Status: :deploy_aborted } end - puts "Asserted active" + puts ACTIVE_ASSERT_MSG elbv2.shift_traffic(rule, step_size, old_target_group_arn, target_group_arn) puts "Shifted traffic on #{rule.rule_arn} to #{total}%" end @@ -348,14 +353,14 @@ def call production_rules = elbv2.describe_rules(rule_ids) production_rules.each do |rule| - puts "Asserting active deploy" + puts ACTIVE_DEPLOY_MSG begin @config.assert_active rescue Aws::DynamoDB::Errors::ConditionalCheckFailedException - puts "No longer active deploy." + puts NOT_ACTIVE_MSG return { Status: :deploy_aborted } end - puts "Asserted active" + puts ACTIVE_ASSERT_MSG elbv2.coalesce(rule, target_group_arn) puts "Coalesced traffic on #{rule.rule_arn}" end @@ -403,14 +408,14 @@ def initialize(config, allow_undeploy:, automatic_undeploy_minutes:) def call asg = Asg.new(@config) - puts "Asserting active deploy" + puts ACTIVE_DEPLOY_MSG begin @config.assert_active rescue Aws::DynamoDB::Errors::ConditionalCheckFailedException - puts "No longer active deploy." + puts NOT_ACTIVE_MSG return { Status: :deploy_aborted } end - puts "Asserted active" + puts ACTIVE_ASSERT_MSG deploy_asg = asg.get(@config.deploy_asg) puts "Allowing scale-in of new ASG #{deploy_asg.auto_scaling_group_name}" diff --git a/modules/deployomat/src/lambda_handlers.rb b/modules/deployomat/src/lambda_handlers.rb index f4a2dfa..4eab5c1 100644 --- a/modules/deployomat/src/lambda_handlers.rb +++ b/modules/deployomat/src/lambda_handlers.rb @@ -73,6 +73,8 @@ def self.deploy(event:, context:) Deployomat::FinishDeploy.new( config, allow_undeploy: event['AllowUndeploy'], automatic_undeploy_minutes: event['AutomaticUndeployMinutes'] ) + else + return { Status: :fail, Error: ["Unexpected step: '#{event['Step']}'"] } end op.call end diff --git a/modules/deployomat/src/lib.rb b/modules/deployomat/src/lib.rb index 216aa05..6e7b172 100644 --- a/modules/deployomat/src/lib.rb +++ b/modules/deployomat/src/lib.rb @@ -54,7 +54,8 @@ def self.role_credentials(role_arn, deploy_id) our_role_creds[deploy_id] ||= begin Aws::AssumeRoleCredentials.new( role_arn: role_arn, - role_session_name: deploy_id[0...64] + role_session_name: deploy_id[0...64], + external_id: ENV['ROLE_EXTERNAL_ID'] ) end end @@ -90,6 +91,27 @@ class Config UNDEPLOYING = 'undeploying' ALLOW = 'allow' + ID_VAR = '#DEPLOY_ID' + ASG_VAR = '#DEPLOY_ASG' + UNDEPLOY_VAR = '#UNDEPLOY_STATE' + PROD_ASG_VAR = '#PROD_ASG' + + ID_KEY = ':deploy_id' + ASG_KEY = ':deploy_asg' + NEW_ID_KEY = ':new_deploy_id' + OLD_ID_KEY = ':old_deploy_id' + EMPTY_KEY = ':empty' + UNDEPLOY_KEY = ':undeploying' + ALLOW_KEY = ':allow' + PROD_ASG_KEY = ':prod_asg' + UNDEPLOY_STATE_KEY = ':undeploy_state' + + ID_NAME = 'deploy_id' + ASG_NAME = 'deploy_asg_name' + UNDEPLOY_STATE_NAME = 'undeploy_state' + DEPLOY_NAME = 'deploy_asg' + PROD_ASG_NAME = 'production_asg_name' + attr_reader :account_canonical_slug, :account_name, :service_name, :prefix, :deploy_id, :params, :organization_prefix, :account_environment, :primary_key @@ -128,117 +150,134 @@ def reload end def production_asg - @config&.fetch('production_asg_name', nil) + @config&.fetch(PROD_ASG_NAME, nil) end def deploy_asg - @config&.fetch('deploy_asg_name', nil) + @config&.fetch(ASG_NAME, nil) end def undeploying? - @config&.fetch('undeploy_state', '') == UNDEPLOYING + @config&.fetch(UNDEPLOY_STATE_NAME, '') == UNDEPLOYING end def undeployable? - @config&.fetch('undeploy_state', '') == ALLOW + @config&.fetch(UNDEPLOY_STATE_NAME, '') == ALLOW end + ASSERT_START_CANCEL_UPDATE_EXPR = "SET #{ID_VAR} = #{NEW_ID_KEY}" + ASSERT_START_CANCEL_CONDITION_EXPR = "#{ID_VAR} = #{OLD_ID_KEY} AND attribute_exists(#{ASG_VAR}) AND #{ASG_VAR} <> #{EMPTY_KEY}" + def assert_start_cancel @config = @client.update_item( table_name: ENV['DEPLOYOMAT_TABLE'], return_values: 'ALL_NEW', key: { 'id' => @primary_key }, - update_expression: 'SET #DEPLOY_ID = :new_deploy_id', - condition_expression: '#DEPLOY_ID = :old_deploy_id AND attribute_exists(#DEPLOY_ASG) AND #DEPLOY_ASG <> :empty', + update_expression: ASSERT_START_CANCEL_UPDATE_EXPR, + condition_expression: ASSERT_START_CANCEL_CONDITION_EXPR, expression_attribute_names: { - '#DEPLOY_ID' => 'deploy_id', - '#DEPLOY_ASG' => 'deploy_asg_name' + ID_VAR => ID_NAME, + ASG_VAR => ASG_NAME }, expression_attribute_values: { - ':new_deploy_id' => @deploy_id, - ':old_deploy_id' => @config&.fetch('deploy_id', nil), - ':empty' => '' + NEW_ID_KEY => @deploy_id, + OLD_ID_KEY => @config&.fetch(ID_NAME, nil), + EMPTY_KEY => '' } ).attributes end + ASSERT_START_DEPLOY_UPDATE_EXPR = "SET #{ID_VAR} = #{NEW_ID_KEY}, #{ASG_VAR} = #{ASG_KEY}" + ASSERT_START_DEPLOY_CONDITION_EXPR = "(attribute_not_exists(#{ID_VAR}) OR #{ID_VAR} = #{OLD_ID_KEY}) AND (attribute_not_exists(#{ASG_VAR}) OR #{ASG_VAR} = #{EMPTY_KEY}) AND (attribute_not_exists(#{UNDEPLOY_VAR}) OR #{UNDEPLOY_VAR} <> #{UNDEPLOY_KEY})" + def assert_start_deploy(name) @config = @client.update_item( table_name: ENV['DEPLOYOMAT_TABLE'], return_values: 'ALL_NEW', key: { 'id' => @primary_key }, - update_expression: 'SET #DEPLOY_ID = :new_deploy_id, #DEPLOY_ASG = :deploy_asg', - condition_expression: '(attribute_not_exists(#DEPLOY_ID) OR #DEPLOY_ID = :old_deploy_id) AND (attribute_not_exists(#DEPLOY_ASG) OR #DEPLOY_ASG = :empty) AND (attribute_not_exists(#UNDEPLOY_STATE) OR #UNDEPLOY_STATE <> :undeploying)', + update_expression: ASSERT_START_DEPLOY_UPDATE_EXPR, + condition_expression: ASSERT_START_DEPLOY_CONDITION_EXPR, expression_attribute_names: { - '#DEPLOY_ID' => 'deploy_id', - '#DEPLOY_ASG' => 'deploy_asg_name', - '#UNDEPLOY_STATE' => 'undeploy_state' + ID_VAR => ID_NAME, + ASG_VAR => ASG_NAME, + UNDEPLOY_VAR => UNDEPLOY_STATE_NAME }, expression_attribute_values: { - ':new_deploy_id' => @deploy_id, - ':old_deploy_id' => @config&.fetch('deploy_id', nil), - ':empty' => '', - ':deploy_asg' => name, - ':undeploying' => UNDEPLOYING + NEW_ID_KEY => @deploy_id, + OLD_ID_KEY => @config&.fetch(ID_NAME, nil), + EMPTY_KEY => '', + ASG_KEY => name, + UNDEPLOY_KEY => UNDEPLOYING } ).attributes end + ASSERT_START_UNDEPLOY_UPDATE_EXPR = "SET #{ID_VAR} = #{NEW_ID_KEY}, #{UNDEPLOY_VAR} = #{UNDEPLOY_KEY}" + ASSERT_START_UNDEPLOY_CONDITION_EXPR = "(attribute_not_exists(#{ID_VAR}) OR #{ID_VAR} = #{OLD_ID_KEY}) AND (attribute_not_exists(#{ASG_VAR}) OR #{ASG_VAR} = #{EMPTY_KEY}) AND (#{UNDEPLOY_VAR} = #{ALLOW_KEY} OR #{UNDEPLOY_VAR} = #{UNDEPLOY_KEY})" + def assert_start_undeploy @config = @client.update_item( table_name: ENV['DEPLOYOMAT_TABLE'], return_values: 'ALL_NEW', key: { 'id' => @primary_key }, - update_expression: 'SET #DEPLOY_ID = :new_deploy_id, #UNDEPLOY_STATE = :undeploying', - condition_expression: '(attribute_not_exists(#DEPLOY_ID) OR #DEPLOY_ID = :old_deploy_id) AND (attribute_not_exists(#DEPLOY_ASG) OR #DEPLOY_ASG = :empty) AND (#UNDEPLOY_STATE = :allow OR #UNDEPLOY_STATE = :undeploying)', + update_expression: ASSERT_START_UNDEPLOY_UPDATE_EXPR, + condition_expression: ASSERT_START_UNDEPLOY_CONDITION_EXPR, expression_attribute_names: { - '#DEPLOY_ID' => 'deploy_id', - '#DEPLOY_ASG' => 'deploy_asg', - '#UNDEPLOY_STATE' => 'undeploy_state' + ID_VAR => ID_NAME, + ASG_VAR => DEPLOY_NAME, + UNDEPLOY_VAR => UNDEPLOY_STATE_NAME }, expression_attribute_values: { - ':new_deploy_id' => @deploy_id, - ':old_deploy_id' => @config&.fetch('deploy_id', nil), - ':empty' => '', - ':undeploying' => UNDEPLOYING, - ':allow' => ALLOW + NEW_ID_KEY => @deploy_id, + OLD_ID_KEY => @config&.fetch(ID_NAME, nil), + EMPTY_KEY => '', + UNDEPLOY_KEY => UNDEPLOYING, + ALLOW_KEY => ALLOW } ).attributes end + COMPLETE_UNDEPLOY_CONDITION_EXPR = "(attribute_not_exists(#{ID_VAR}) OR #{ID_VAR} = #{ID_KEY}) AND (attribute_not_exists(#{UNDEPLOY_VAR}) OR #{UNDEPLOY_VAR} = #{UNDEPLOY_KEY})" + def complete_undeploy @config = @client.delete_item( table_name: ENV['DEPLOYOMAT_TABLE'], return_values: 'ALL_OLD', key: { 'id' => @primary_key }, - condition_expression: '(attribute_not_exists(#DEPLOY_ID) OR #DEPLOY_ID = :deploy_id) AND (attribute_not_exists(#UNDEPLOY_STATE) OR #UNDEPLOY_STATE = :undeploying)', + condition_expression: COMPLETE_UNDEPLOY_CONDITION_EXPR, expression_attribute_names: { - '#DEPLOY_ID' => 'deploy_id', - '#UNDEPLOY_STATE' => 'undeploy_state' + ID_VAR => ID_NAME, + UNDEPLOY_VAR => UNDEPLOY_STATE_NAME }, expression_attribute_values: { - ':deploy_id' => @deploy_id, - ':undeploying' => UNDEPLOYING + ID_KEY => @deploy_id, + UNDEPLOY_KEY => UNDEPLOYING } ) end + ASSERT_ACTIVE_UPDATE_EXPR = "SET #{ID_VAR} = #{ID_KEY}" + ASSERT_ACTIVE_CONDITION_EXPR = "#{ID_VAR} = #{ID_KEY}" + def assert_active @config = @client.update_item( table_name: ENV['DEPLOYOMAT_TABLE'], return_values: 'ALL_NEW', key: { 'id' => @primary_key }, - update_expression: 'SET #DEPLOY_ID = :deploy_id', - condition_expression: '#DEPLOY_ID = :deploy_id', + update_expression: ASSERT_ACTIVE_UPDATE_EXPR, + condition_expression: ASSERT_ACTIVE_CONDITION_EXPR, expression_attribute_names: { - '#DEPLOY_ID' => 'deploy_id' + ID_VAR => ID_NAME }, expression_attribute_values: { - ':deploy_id' => @deploy_id + ID_KEY => @deploy_id } ).attributes end + SET_PROD_ASG_UPDATE_EXPR = "SET #{PROD_ASG_VAR} = #{PROD_ASG_KEY}, #{ASG_VAR} = #{EMPTY_KEY}, #{UNDEPLOY_VAR} = #{UNDEPLOY_STATE_KEY}" + SET_PROD_ASG_CONDITION_EXPR = "#{ID_VAR} = #{ID_KEY}" + def set_production_asg(name, allow_undeploy: nil) allow_undeploy = undeploying? || undeployable? if allow_undeploy.nil? @@ -246,19 +285,19 @@ def set_production_asg(name, allow_undeploy: nil) table_name: ENV['DEPLOYOMAT_TABLE'], return_values: 'ALL_NEW', key: { 'id' => @primary_key }, - update_expression: 'SET #PROD_ASG = :prod_asg, #DEPLOY_ASG = :empty, #UNDEPLOY_STATE = :undeploy_state', - condition_expression: '#DEPLOY_ID = :deploy_id', + update_expression: SET_PROD_ASG_UPDATE_EXPR, + condition_expression: SET_PROD_ASG_CONDITION_EXPR, expression_attribute_names: { - '#DEPLOY_ID' => 'deploy_id', - '#PROD_ASG' => 'production_asg_name', - '#DEPLOY_ASG' => 'deploy_asg_name', - '#UNDEPLOY_STATE' => 'undeploy_state' + ID_VAR => ID_NAME, + PROD_ASG_VAR => PROD_ASG_NAME, + ASG_VAR => ASG_NAME, + UNDEPLOY_VAR => UNDEPLOY_STATE_NAME }, expression_attribute_values: { - ':deploy_id' => @deploy_id, - ':prod_asg' => name, - ':empty' => '', - ':undeploy_state' => allow_undeploy ? ALLOW : '' + ID_KEY => @deploy_id, + PROD_ASG_KEY => name, + EMPTY_KEY => '', + UNDEPLOY_STATE_KEY => allow_undeploy ? ALLOW : '' } ).attributes end @@ -396,7 +435,7 @@ def disable_automatic_undeploy private def rule_name - @rule_name ||= "#{@config.primary_key}-automatic-undeploy" + @rule_name ||= "#{@config.primary_key[0...54]}-undeploy" end end @@ -513,7 +552,32 @@ def clone_asg(template_asg, launch_template, name, min_size, target_group_arn) new_asg_parameters[:placement_group] = template_asg.placement_group end - @client.create_auto_scaling_group(new_asg_parameters) + retry_count = 0 + begin + @client.create_auto_scaling_group(new_asg_parameters) + rescue Aws::AutoScaling::Errors::AlreadyExistsFault + # If we got here and never retried, we are in trouble + # If we got here because a retry succeeded, we're okay. + raise if retry_count == 0 + # The most common reason we get here is an eventual consistency issue with target groups. + # We see from time to time that AutoScaling isn't able to identify a target group that was + # recently created. + rescue Aws::AutoScaling::Errors::ServiceError => exc + puts "Error cloning ASG to #{name}. #{exc.class.name} #{exc.message}" + retry_count += 1 + raise if retry_count >= 3 + puts "Retrying #{retry_count}" + sleep 2 ** retry_count + retry + end + + if template_asg.enabled_metrics && template_asg.enabled_metrics.length > 0 + @client.enable_metrics_collection( + auto_scaling_group_name: name, + granularity: template_asg.enabled_metrics[0].granularity, + metrics: template_asg.enabled_metrics.map(&:metric) + ) + end if template_asg.warm_pool_configuration @client.put_warm_pool( @@ -552,6 +616,7 @@ def initialize(config) def clone_target_group(target_group_arn, clone_name) new_tg_conf = @client.describe_target_groups(target_group_arns: [target_group_arn]).target_groups&.first&.to_h + new_tg_attributes = @client.describe_target_group_attributes(target_group_arn: target_group_arn).attributes REMOVE_TG_PARAMS.each { |param| new_tg_conf.delete(param) } tags = @client.describe_tags(resource_arns: [target_group_arn]).tag_descriptions&.first&.tags @@ -563,92 +628,103 @@ def clone_target_group(target_group_arn, clone_name) end new_tg_conf[:tags] = tags - @client.create_target_group( + new_tg = @client.create_target_group( new_tg_conf.merge(name: clone_name.gsub(/[^A-Za-z0-9\-]/, '-')[0...32]) ).target_groups.first + @client.modify_target_group_attributes(target_group_arn: new_tg.target_group_arn, attributes: new_tg_attributes) + return new_tg end - def find_rule_with_target_in_listener(listener_arn, target_group) + def find_rules_with_targets_in_listener(listener_arn, target_groups) rules = @client.describe_rules(listener_arn: listener_arn).rules - production_rule = exemplar_rule = nil - # TODO: Support multiple rules per target group. - rules.find do |rule| - rule.actions.any? { |action| action.forward_config&.target_groups&.any? { |tg_conf| tg_conf.target_group_arn == target_group } } + rules.select do |rule| + rule.actions.any? { |action| action.forward_config&.target_groups&.any? { |tg_conf| target_groups.include?(tg_conf.target_group_arn) } } end end - def prepare_deploy_rule(listener_arn, production_tg_arn, exemplar_tg_arn, deploy_tg_arn) + def prepare_deploy_rules(listener_arn, production_tg_arn, exemplar_tg_arn, deploy_tg_arn) rules = @client.describe_rules(listener_arn: listener_arn).rules - production_rule = exemplar_rule = nil + production_rules = {} + exemplar_rules = {} # TODO: Support multiple rules per target group. rules.each do |rule| if rule.actions.any? { |action| action.forward_config&.target_groups&.any? { |tg_conf| tg_conf.target_group_arn == exemplar_tg_arn } } - exemplar_rule = rule + exemplar_rules[rule.priority.to_i - PRIORITY_OFFSET] = rule elsif rule.actions.any? { |action| action.forward_config && action.forward_config.target_groups.any? { |tg_conf| tg_conf.target_group_arn == production_tg_arn } } - production_rule = rule + production_rules[rule.priority.to_i] = rule end end - if !production_rule && exemplar_rule - tags = @client.describe_tags(resource_arns: [exemplar_rule.rule_arn]).tag_descriptions&.first&.tags + exemplar_rules.map do |(priority, exemplar_rule)| + production_rule = production_rules[priority] - managed = tags.find { |tag| tag[:key] == 'Managed' } - if managed - managed[:value] = MANAGED_TAG - else - tags.push({ key: 'Managed', value: MANAGED_TAG }) - end - # Assert exemplar priority >= 40k - new_rule = exemplar_rule.to_h - new_rule[:priority] = (new_rule[:priority].to_i - PRIORITY_OFFSET).to_s - new_rule[:actions].each do |action| - if action[:target_group_arn] == exemplar_tg_arn - action[:target_group_arn] = deploy_tg_arn + if !production_rule + tags = @client.describe_tags(resource_arns: [exemplar_rule.rule_arn]).tag_descriptions&.first&.tags + + managed = tags.find { |tag| tag[:key] == 'Managed' } + if managed + managed[:value] = MANAGED_TAG + else + tags.push({ key: 'Managed', value: MANAGED_TAG }) end + # Assert exemplar priority >= 40k + new_rule = exemplar_rule.to_h + new_rule[:priority] = (new_rule[:priority].to_i - PRIORITY_OFFSET).to_s + new_rule[:actions].each do |action| + if action[:target_group_arn] == exemplar_tg_arn + action[:target_group_arn] = deploy_tg_arn + end - if action[:forward_config] - action[:forward_config][:target_groups].each do |group| - group[:target_group_arn] = deploy_tg_arn if group[:target_group_arn] == exemplar_tg_arn + if action[:forward_config] + action[:forward_config][:target_groups].each do |group| + group[:target_group_arn] = deploy_tg_arn if group[:target_group_arn] == exemplar_tg_arn + end end end - end - new_rule[:conditions].each do |condition| - condition.delete(:values) - end - new_rule[:listener_arn] = listener_arn - new_rule[:tags] = tags - REMOVE_RULE_PARAMS.each { |param| new_rule.delete(param) } + new_rule[:conditions].each do |condition| + condition.delete(:values) + end + new_rule[:listener_arn] = listener_arn + new_rule[:tags] = tags + REMOVE_RULE_PARAMS.each { |param| new_rule.delete(param) } - # TODO: This can fail if another deployomat creates a rule before us. We should - # clean up all resources and terminate in that case. - [:initial, @client.create_rule(new_rule).rules.first] - else - new_rule = production_rule.to_h - action = new_rule[:actions].find { |action| action.dig(:forward_config, :target_groups)&.any? { |tg_conf| tg_conf[:target_group_arn] == production_tg_arn } } - action.delete(:target_group_arn) + # TODO: This can fail if another deployomat creates a rule before us. We should + # clean up all resources and terminate in that case. + [:initial, @client.create_rule(new_rule).rules.first] + else + new_rule = production_rule.to_h + action = new_rule[:actions].find { |action| action.dig(:forward_config, :target_groups)&.any? { |tg_conf| tg_conf[:target_group_arn] == production_tg_arn } } + action.delete(:target_group_arn) - # TODO: Assert that the production rule only contains one forward to the known production tg. + # TODO: Assert that the production rule only contains one forward to the known production tg. - action[:forward_config][:target_groups] = [ - { - target_group_arn: production_tg_arn, - weight: 100 - }, - { - target_group_arn: deploy_tg_arn, - weight: 0 - } - ] + action[:forward_config][:target_groups] = [ + { + target_group_arn: production_tg_arn, + weight: 100 + }, + { + target_group_arn: deploy_tg_arn, + weight: 0 + } + ] - [:update, modify_rule(new_rule)] + [:update, modify_rule(new_rule)] + end end end def shift_traffic(rule, amount, production_tg_arn, deploy_tg_arn) new_rule = rule.to_h - forwards = new_rule[:actions].find { |action| action.dig(:forward_config, :target_groups)&.any? { |tg_conf| tg_conf[:target_group_arn] == production_tg_arn } }.dig(:forward_config, :target_groups) + forwards = new_rule[:actions].find { |action| action.dig(:forward_config, :target_groups)&.any? { |tg_conf| tg_conf[:target_group_arn] == production_tg_arn } }&.dig(:forward_config, :target_groups) + + if forwards.nil? || forwards.length < 2 + puts "Rule #{new_rule[:rule_arn]} not configured for traffic shift, skipping." + return + end + production_forward = forwards.find { |tg_conf| tg_conf[:target_group_arn] == production_tg_arn } deploy_forward = forwards.find { |tg_conf| tg_conf[:target_group_arn] == deploy_tg_arn } @@ -662,10 +738,27 @@ def shift_traffic(rule, amount, production_tg_arn, deploy_tg_arn) def coalesce(rule, production_tg_arn) new_rule = rule.to_h - forwards = new_rule[:actions].find { |action| action.dig(:forward_config, :target_groups)&.any? { |tg_conf| tg_conf[:target_group_arn] == production_tg_arn } }.dig(:forward_config) - forwards[:target_groups] = [{ target_group_arn: production_tg_arn, weight: 100 }] + forward = new_rule[:actions].find { |action| action[:type] == 'forward' } + + if forward.nil? + puts "Rule #{new_rule[:rule_arn]} not configured for traffic shift, skipping." + return false + end + + new_config = [{ target_group_arn: production_tg_arn, weight: 100 }] + + forward.delete(:target_group_arn) + # If we already have a forward config, we only want to update the target groups on it, retaining + # the target_group_stickiness_config. If we have no forward config then we need to set the + # full thing. + if forward.dig(:forward_config, :target_groups) + forward[:forward_config][:target_groups] = new_config + else + forward[:forward_config] = { target_groups: new_config } + end modify_rule(new_rule) + true end def destroy_tg(target_group_arn) diff --git a/modules/deployomat/src/undeploy.rb b/modules/deployomat/src/undeploy.rb index 1955a58..ef6c84c 100644 --- a/modules/deployomat/src/undeploy.rb +++ b/modules/deployomat/src/undeploy.rb @@ -81,8 +81,8 @@ def call key = nil end - rule = elbv2.find_rule_with_target_in_listener(listener_arn, tg_arn) - if rule + rules = elbv2.find_rules_with_targets_in_listener(listener_arn, [tg_arn]) + rules.each do |rule| puts "Destroying rule in #{key} #{listener_arn} : #{rule.rule_arn}" elbv2.delete_rule(rule.rule_arn) end diff --git a/modules/deployomat/variables.tf b/modules/deployomat/variables.tf index 9f88041..3861482 100644 --- a/modules/deployomat/variables.tf +++ b/modules/deployomat/variables.tf @@ -56,3 +56,9 @@ variable "ami_owner_account_ids" { EOT default = null } + +variable "external_id" { + type = string + description = "The ExternalId to use when assuming roles, if necessary." + default = null +} diff --git a/modules/meta_access_role/main.tf b/modules/meta_access_role/main.tf index d65b0d4..3ed043c 100644 --- a/modules/meta_access_role/main.tf +++ b/modules/meta_access_role/main.tf @@ -39,7 +39,45 @@ data "aws_default_tags" "tags" {} data "aws_iam_policy_document" "allow-deployomat-assume" { statement { actions = [ - "sts:AssumeRole", + "sts:AssumeRole" + ] + + principals { + type = "AWS" + identifiers = formatlist("arn:${data.aws_partition.current.partition}:iam::%s:root", var.ci_cd_account_ids) + } + + condition { + test = "StringEquals" + variable = "aws:RequestTag/Environment" + values = ["&{aws:PrincipalTag/Environment}"] + } + + condition { + test = "ForAnyValue:StringLike" + variable = "sts:TransitiveTagKeys" + values = ["Environment"] + } + + condition { + test = "StringEquals" + variable = "aws:PrincipalTag/Service" + values = [var.deployomat_service_name] + } + + dynamic "condition" { + for_each = var.external_id != null ? [1] : [] + + content { + test = "StringEquals" + variable = "sts:ExternalId" + values = [var.external_id] + } + } + } + + statement { + actions = [ "sts:TagSession" ] diff --git a/modules/meta_access_role/variables.tf b/modules/meta_access_role/variables.tf index 7430a4e..a30c65a 100644 --- a/modules/meta_access_role/variables.tf +++ b/modules/meta_access_role/variables.tf @@ -34,3 +34,9 @@ variable "tags" { description = "Tags to apply to all resources. Will be merged with Service=var.deployomat_service_name and deduplicated from default tags." default = {} } + +variable "external_id" { + type = string + description = "The ExternalId to use when assuming roles, if necessary." + default = null +} diff --git a/modules/slack_notifier/src/slack_notify.rb b/modules/slack_notifier/src/slack_notify.rb index 61a21a6..de70588 100644 --- a/modules/slack_notifier/src/slack_notify.rb +++ b/modules/slack_notifier/src/slack_notify.rb @@ -101,7 +101,7 @@ def self.notification_for_undeploy(event) status = detail['status'] deployment_desc = "undeployment of #{input['ServiceName']} from #{input['AccountCanonicalSlug']} (Execution )" if status == 'RUNNING' - { text: "Started undeploy of #{deployment_desc}" } + { text: "Started #{deployment_desc}" } elsif status == 'UPDATE' update = detail['updates'] return { text: "Update from #{deployment_desc}\n\n#{update.join("\n")}"}