Skip to content

Commit d63c631

Browse files
committed
runners: Add terraform module for scale-cycle
Adds terraform files required for deploying scale-cycle. This also adds a switch to turn on scale-cycle or not with the default position being off. This should make it easier to deploy scale-cycle in the future and could also act as a gate for easily enabling / disabling scale-up's tryReuseRunner functionality. Signed-off-by: Eli Uriegas <[email protected]> ghstack-source-id: 87583f4 ghstack-comment-id: 3046573503 Pull-Request: #6893 Signed-off-by: Eli Uriegas <[email protected]>
1 parent 9449370 commit d63c631

File tree

5 files changed

+260
-0
lines changed

5 files changed

+260
-0
lines changed

terraform-aws-github-runner/main.tf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,10 @@ module "runners" {
115115

116116
retry_scale_up_chron_hud_query_url = var.retry_scale_up_chron_hud_query_url
117117

118+
enable_scale_cycle = var.enable_scale_cycle
119+
scale_cycle_schedule_expression = var.scale_cycle_schedule_expression
120+
lambda_timeout_scale_cycle = var.lambda_timeout_scale_cycle
121+
118122
must_have_issues_labels = var.must_have_issues_labels
119123
cant_have_issues_labels = var.cant_have_issues_labels
120124

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
{
2+
"Version": "2012-10-17",
3+
"Statement": [
4+
{
5+
"Effect": "Allow",
6+
"Action": [
7+
"ec2:DescribeInstances",
8+
"ec2:DescribeTags",
9+
"ec2:RunInstances",
10+
"ec2:CreateNetworkInterface",
11+
"ec2:DescribeNetworkInterfaces",
12+
"ec2:DeleteNetworkInterface",
13+
"ec2:DescribeImages",
14+
"ec2:CreateTags",
15+
"ec2:DeleteTags",
16+
"ec2:CreateReplaceRootVolumeTask",
17+
"ec2:DescribeReplaceRootVolumeTasks"
18+
],
19+
"Resource": ["*"]
20+
},
21+
{
22+
"Effect": "Allow",
23+
"Action": [
24+
"ec2:CreateTags"
25+
],
26+
"Resource": ["*"],
27+
"Condition": {
28+
"StringEquals": {
29+
"ec2:CreateAction" : "RunInstances"
30+
}
31+
}
32+
},
33+
{
34+
"Effect": "Allow",
35+
"Action": "iam:PassRole",
36+
"Resource": "${arn_runner_instance_role}"
37+
},
38+
{
39+
"Effect": "Allow",
40+
"Action": ["ssm:PutParameter", "ssm:GetParameter", "ssm:DeleteParameter"],
41+
"Resource": "*"
42+
}
43+
]
44+
}
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
resource "aws_kms_grant" "scale_cycle" {
2+
count = var.encryption.encrypt ? (var.enable_scale_cycle ? 1 : 0) : 0
3+
name = "${var.environment}-scale-cycle"
4+
key_id = var.encryption.kms_key_id
5+
grantee_principal = aws_iam_role.scale_cycle[0].arn
6+
operations = ["Decrypt"]
7+
8+
constraints {
9+
encryption_context_equals = {
10+
Environment = var.environment
11+
}
12+
}
13+
}
14+
15+
resource "aws_lambda_function" "scale_cycle" {
16+
count = var.enable_scale_cycle ? 1 : 0
17+
s3_bucket = var.lambda_s3_bucket != null ? var.lambda_s3_bucket : null
18+
s3_key = var.runners_lambda_s3_key != null ? var.runners_lambda_s3_key : null
19+
s3_object_version = var.runners_lambda_s3_object_version != null ? var.runners_lambda_s3_object_version : null
20+
filename = var.lambda_s3_bucket == null ? local.lambda_zip : null
21+
source_code_hash = var.lambda_s3_bucket == null ? filebase64sha256(local.lambda_zip) : null
22+
function_name = "${var.environment}-scale-cycle"
23+
role = aws_iam_role.scale_cycle[0].arn
24+
handler = "index.scaleCycle"
25+
runtime = "nodejs20.x"
26+
timeout = var.lambda_timeout_scale_cycle
27+
tags = local.tags
28+
memory_size = 2048
29+
30+
environment {
31+
variables = {
32+
DATETIME_DEPLOY = local.datetime_deploy
33+
ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners
34+
ENVIRONMENT = var.environment
35+
GITHUB_APP_CLIENT_ID = var.github_app.client_id
36+
GITHUB_APP_CLIENT_SECRET = var.github_app_client_secret
37+
GITHUB_APP_ID = var.github_app.id
38+
GITHUB_APP_KEY_BASE64 = var.github_app_key_base64
39+
KMS_KEY_ID = var.encryption.kms_key_id
40+
LAMBDA_TIMEOUT = var.lambda_timeout_scale_cycle
41+
LAUNCH_TEMPLATE_NAME_LINUX = var.launch_template_name_linux
42+
LAUNCH_TEMPLATE_NAME_LINUX_ARM64 = var.launch_template_name_linux_arm64
43+
LAUNCH_TEMPLATE_NAME_LINUX_NVIDIA = var.launch_template_name_linux_nvidia
44+
LAUNCH_TEMPLATE_NAME_WINDOWS = var.launch_template_name_windows
45+
LAUNCH_TEMPLATE_VERSION_LINUX = var.launch_template_version_linux
46+
LAUNCH_TEMPLATE_VERSION_LINUX_ARM64 = var.launch_template_version_linux_arm64
47+
LAUNCH_TEMPLATE_VERSION_LINUX_NVIDIA = var.launch_template_version_linux_nvidia
48+
LAUNCH_TEMPLATE_VERSION_WINDOWS = var.launch_template_version_windows
49+
MINIMUM_RUNNING_TIME_IN_MINUTES = var.minimum_running_time_in_minutes
50+
REDIS_ENDPOINT = var.redis_endpoint
51+
REDIS_LOGIN = var.redis_login
52+
RUNNER_EXTRA_LABELS = var.runner_extra_labels
53+
SCALE_CONFIG_ORG = var.scale_config_org
54+
SCALE_CONFIG_REPO = var.scale_config_repo
55+
SCALE_CONFIG_REPO_PATH = var.scale_config_repo_path
56+
SECRETSMANAGER_SECRETS_ID = var.secretsmanager_secrets_id
57+
58+
AWS_REGIONS_TO_VPC_IDS = join(
59+
",",
60+
sort(distinct([
61+
for region_vpc in var.vpc_ids :
62+
format("%s|%s", region_vpc.region, region_vpc.vpc)
63+
]))
64+
)
65+
VPC_ID_TO_SECURITY_GROUP_IDS = join(
66+
",",
67+
sort(distinct(concat(
68+
[
69+
for vpc in var.vpc_ids :
70+
format(
71+
"%s|%s",
72+
vpc.vpc,
73+
var.runners_security_group_ids[local.vpc_id_to_idx[vpc.vpc]]
74+
)
75+
],
76+
[
77+
for vpc_subnet in var.vpc_sgs :
78+
format("%s|%s", vpc_subnet.vpc, vpc_subnet.sg)
79+
]
80+
)))
81+
)
82+
VPC_ID_TO_SUBNET_IDS = join(
83+
",",
84+
sort(distinct([
85+
for vpc_subnet in var.subnet_vpc_ids :
86+
format("%s|%s", vpc_subnet.vpc, vpc_subnet.subnet)
87+
]))
88+
)
89+
SUBNET_ID_TO_AZ = join(
90+
",",
91+
sort(distinct([
92+
for subnet_az in var.subnet_azs :
93+
format("%s|%s", subnet_az.subnet, subnet_az.az)
94+
]))
95+
)
96+
}
97+
}
98+
99+
vpc_config {
100+
security_group_ids = concat(
101+
var.lambda_security_group_ids,
102+
[var.runners_security_group_ids[0]]
103+
)
104+
subnet_ids = var.lambda_subnet_ids
105+
}
106+
}
107+
108+
resource "aws_cloudwatch_log_group" "scale_cycle" {
109+
count = var.enable_scale_cycle ? 1 : 0
110+
name = "/aws/lambda/${aws_lambda_function.scale_cycle[0].function_name}"
111+
retention_in_days = var.logging_retention_in_days
112+
tags = var.tags
113+
}
114+
115+
resource "aws_cloudwatch_event_rule" "scale_cycle" {
116+
count = var.enable_scale_cycle ? 1 : 0
117+
name = "${var.environment}-scale-cycle-rule"
118+
schedule_expression = var.scale_cycle_schedule_expression
119+
tags = var.tags
120+
}
121+
122+
resource "aws_cloudwatch_event_target" "scale_cycle" {
123+
count = var.enable_scale_cycle ? 1 : 0
124+
rule = aws_cloudwatch_event_rule.scale_cycle[0].name
125+
arn = aws_lambda_function.scale_cycle[0].arn
126+
}
127+
128+
resource "aws_lambda_permission" "scale_cycle" {
129+
count = var.enable_scale_cycle ? 1 : 0
130+
statement_id = "AllowExecutionFromCloudWatch"
131+
action = "lambda:InvokeFunction"
132+
function_name = aws_lambda_function.scale_cycle[0].function_name
133+
principal = "events.amazonaws.com"
134+
source_arn = aws_cloudwatch_event_rule.scale_cycle[0].arn
135+
}
136+
137+
resource "aws_iam_role" "scale_cycle" {
138+
count = var.enable_scale_cycle ? 1 : 0
139+
name = "${var.environment}-action-scale-cycle-lambda-role"
140+
assume_role_policy = data.aws_iam_policy_document.lambda_assume_role_policy.json
141+
path = local.role_path
142+
permissions_boundary = var.role_permissions_boundary
143+
tags = local.tags
144+
}
145+
146+
resource "aws_iam_role_policy" "scale_cycle" {
147+
count = var.enable_scale_cycle ? 1 : 0
148+
name = "${var.environment}-lambda-scale-cycle-policy"
149+
role = aws_iam_role.scale_cycle[0].name
150+
policy = templatefile("${path.module}/policies/lambda-scale-cycle.json", {
151+
arn_runner_instance_role = var.role_runner_arn
152+
})
153+
}
154+
155+
resource "aws_iam_role_policy" "scale_cycle_logging" {
156+
count = var.enable_scale_cycle ? 1 : 0
157+
name = "${var.environment}-lambda-logging"
158+
role = aws_iam_role.scale_cycle[0].name
159+
policy = templatefile("${path.module}/policies/lambda-cloudwatch.json", {
160+
log_group_arn = aws_cloudwatch_log_group.scale_cycle[0].arn
161+
})
162+
}
163+
164+
resource "aws_iam_role_policy_attachment" "scale_cycle_vpc_execution_role" {
165+
count = length(var.lambda_subnet_ids) > 0 ? (var.enable_scale_cycle ? 1 : 0) : 0
166+
role = aws_iam_role.scale_cycle[0].name
167+
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole"
168+
}
169+
170+
resource "aws_iam_role_policy" "scale_cycle_secretsmanager_access" {
171+
count = var.secretsmanager_secrets_id != null ? (var.enable_scale_cycle ? 1 : 0) : 0
172+
role = aws_iam_role.scale_cycle[0].name
173+
policy = templatefile("${path.module}/policies/lambda-secretsmanager.json", {
174+
secretsmanager_arn = data.aws_secretsmanager_secret_version.app_creds.arn
175+
})
176+
}

terraform-aws-github-runner/modules/runners/variables.tf

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,12 @@ variable "scale_up_chron_schedule_expression" {
100100
default = "cron(*/10 * * * ? *)" # every 10 minutes
101101
}
102102

103+
variable "scale_cycle_schedule_expression" {
104+
description = "Scheduler expression for EBS volume replacement cycle."
105+
type = string
106+
default = "cron(*/5 * * * ? *)" # every 5 minutes
107+
}
108+
103109
variable "minimum_running_time_in_minutes" {
104110
description = "The time an ec2 action runner should be running at minimum before terminated if non busy."
105111
type = number
@@ -124,6 +130,12 @@ variable "lambda_timeout_scale_up_chron" {
124130
default = 900
125131
}
126132

133+
variable "lambda_timeout_scale_cycle" {
134+
description = "Time out for the scale cycle lambda in seconds."
135+
type = number
136+
default = 900
137+
}
138+
127139
variable "role_permissions_boundary" {
128140
description = "Permissions boundary that will be added to the created role for the lambda."
129141
type = string
@@ -323,3 +335,9 @@ variable "retry_scale_up_chron_hud_query_url" {
323335
description = "URL used in scale-up-chron to query HUD for queued jobs, if empty scale up cron will not run."
324336
type = string
325337
}
338+
339+
variable "enable_scale_cycle" {
340+
description = "Enable the scale cycle lambda for EBS volume replacement."
341+
type = bool
342+
default = false
343+
}

terraform-aws-github-runner/variables.tf

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,24 @@ variable "retry_scale_up_chron_hud_query_url" {
374374
default = ""
375375
}
376376

377+
variable "enable_scale_cycle" {
378+
description = "Enable the scale cycle lambda for runner reuse."
379+
type = bool
380+
default = false
381+
}
382+
383+
variable "scale_cycle_schedule_expression" {
384+
description = "Scheduler expression for runner reuse cycle."
385+
type = string
386+
default = "cron(*/5 * * * ? *)" # every 5 minutes
387+
}
388+
389+
variable "lambda_timeout_scale_cycle" {
390+
description = "Time out for the scale cycle lambda in seconds."
391+
type = number
392+
default = 900
393+
}
394+
377395
variable "wiz_secret_arn" {
378396
description = "ARN of AWS Secrets Manager secret that the runner role should have access to"
379397
type = string

0 commit comments

Comments
 (0)