Skip to content

Commit 8da26bf

Browse files
committed
feat(agent): change to distroless and use python for chroot and chmod
1 parent e460a65 commit 8da26bf

File tree

5 files changed

+318
-346
lines changed

5 files changed

+318
-346
lines changed

agent/.dockerignore

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
venv
2+
root_mount
3+
root
4+
5+
__pycache__
6+
.pytest_cache
7+
dist
8+
.coverage
9+
.coverage.*
10+
coverage.xml
11+
test-results.xml
12+
13+
vendor/lock_file.bak

agent/skyhook-agent/src/skyhook_agent/controller.py

Lines changed: 46 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -20,23 +20,9 @@
2020
# LICENSE END
2121
#
2222

23-
24-
25-
26-
27-
28-
29-
30-
31-
32-
33-
34-
35-
36-
37-
3823
import sys
3924
import os
25+
import stat
4026
import subprocess
4127
import base64
4228
import asyncio
@@ -129,21 +115,23 @@ async def tee(cmd: List[str], stdout_sink_path: str, stderr_sink_path: str, writ
129115
def get_host_path_for_steps(copy_dir: str):
130116
return f"{copy_dir}/skyhook_dir"
131117

132-
def get_skyhook_directory(root_mount: str) -> str:
133-
return f"{root_mount}/etc/skyhook"
118+
def get_skyhook_directory() -> str:
119+
return f"/etc/skyhook"
134120

121+
def get_flag_dir() -> str:
122+
return f"{get_skyhook_directory()}/flags"
135123

136-
def get_flag_dir(root_mount: str) -> str:
137-
return f"{get_skyhook_directory(root_mount)}/flags"
124+
def get_history_dir() -> str:
125+
return f"{get_skyhook_directory()}/history"
138126

139-
def get_history_dir(root_mount: str) -> str:
140-
return f"{get_skyhook_directory(root_mount)}/history"
127+
def get_log_dir() -> str:
128+
return f"/var/log/skyhook"
141129

142-
def get_log_file(root_mount: str, step_path: str, copy_dir: str, config_data: dict, timestamp: str=None) -> str:
130+
def get_log_file(step_path: str, copy_dir: str, config_data: dict, timestamp: str=None) -> str:
143131
if timestamp is None:
144132
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d-%H%M%S")
145133
package_name, package_current_version = _get_package_information(config_data)
146-
log_file = f"{root_mount}/var/log/skyhook/{package_name}/{package_current_version}/{step_path.replace(get_host_path_for_steps(copy_dir), '')}-{timestamp}.log"
134+
log_file = f"{get_log_dir()}/{package_name}/{package_current_version}/{step_path.replace(get_host_path_for_steps(copy_dir), '')}-{timestamp}.log"
147135
os.makedirs(os.path.dirname(log_file), exist_ok=True)
148136
return log_file
149137

@@ -160,9 +148,9 @@ def cleanup_old_logs(log_file_glob: str) -> None:
160148

161149

162150
def make_flag_path(
163-
step: Step|UpgradeStep, root_mount: str, config_data: dict
151+
step: Step|UpgradeStep, config_data: dict
164152
) -> str:
165-
flag_dir = get_flag_dir(root_mount)
153+
flag_dir = get_flag_dir()
166154
package_name, package_current_version = _get_package_information(config_data)
167155
marker = base64.b64encode(bytes(f"{step.arguments}_{step.returncodes}", "utf-8")).decode("utf-8")
168156
return f"{flag_dir}/{package_name}/{package_current_version}/{step.path}_{marker}"
@@ -174,14 +162,14 @@ def set_flag(flag_file: str, msg: str = "") -> None:
174162
f.write(msg)
175163

176164

177-
def _run(cmds: list[str], log_path: str, on_host: bool, root_mount: str, write_cmds=False, **kwargs) -> int:
165+
def _run(cmds: list[str], log_path: str, write_cmds=False, **kwargs) -> int:
178166
"""
179167
Synchronous wrapper around the tee command to have logs written to disk
180168
"""
181169
# "tee" the stdout and stderr to a file to log the step results
182170
result = asyncio.run(
183171
tee(
184-
(["chroot", root_mount] if on_host else []) + cmds,
172+
cmds,
185173
log_path,
186174
f"{log_path}.err",
187175
write_cmds=write_cmds,
@@ -193,12 +181,11 @@ def _run(cmds: list[str], log_path: str, on_host: bool, root_mount: str, write_c
193181

194182
def run_step(
195183
step: Step|UpgradeStep,
196-
root_mount: str,
197184
copy_dir: str,
198185
config_data: dict
199186
) -> bool:
200187
"""
201-
Run the given Step via chroot on the given root_mount path if Step.on_host is True.
188+
Run the given Step.
202189
Any arguments for the step that start with "env:" will be sourced from their environment variable.
203190
Any environment variables that do not exist will fail the run.
204191
The following environment variables are also set into the steps execution environment:
@@ -207,7 +194,6 @@ def run_step(
207194
208195
Args:
209196
step(Step): Object of class Step.
210-
root_mount(str): Path to the mount dir
211197
copy_dir(str): Directory path containing all the step scripts.
212198
config_data(dict): The config data. Must contain package_name and package_version
213199
Returns: bool of return codes
@@ -233,21 +219,18 @@ def run_step(
233219
print(msg)
234220
return True
235221

236-
subprocess.run(
237-
(f"chroot {root_mount} " if step.on_host else "") + f"chmod +x {step_path}",
238-
shell=True,
239-
)
222+
# chmod +x the step
223+
os.chmod(step_path, os.stat(step_path).st_mode | stat.S_IXGRP | stat.S_IXUSR | stat.S_IXOTH)
224+
240225
time.sleep(1)
241-
log_file = get_log_file(root_mount, step_path, copy_dir, config_data)
226+
log_file = get_log_file(step_path, copy_dir, config_data)
242227
return_code = _run(
243228
[step_path, *step.arguments],
244229
log_file,
245-
on_host=step.on_host,
246-
root_mount=root_mount,
247230
# Make sure to include the original environment here or else things like path resolution dont work
248231
env=dict(**os.environ, **step.env, **{"STEP_ROOT": get_host_path_for_steps(copy_dir), "SKYHOOK_DIR": copy_dir}),)
249232

250-
cleanup_old_logs(get_log_file(root_mount, step_path, copy_dir, config_data, "*"))
233+
cleanup_old_logs(get_log_file(step_path, copy_dir, config_data, "*"))
251234
if return_code not in step.returncodes:
252235
print(f"FAILED: {step.path} {' '.join(step.arguments)} {return_code}")
253236
return True
@@ -282,12 +265,11 @@ def check_flag_file(
282265
return True
283266
return False
284267

285-
def get_or_update_history(root_mount: str, config_data: dict, write: bool = False, step: Step|UpgradeStep = None, mode: Mode = None) -> None:
268+
def get_or_update_history(config_data: dict, write: bool = False, step: Step|UpgradeStep = None, mode: Mode = None) -> None:
286269
"""
287270
Manages the history file for tracking version changes, and auditing purposes.
288271
289272
Args:
290-
root_mount (str): The root directory.
291273
write (bool): If True, updates the history file. If False, reads from the history file. Defaults to False.
292274
step (Step | UpgradeStep): The current step being processed. Required when reading the history.
293275
mode (Mode): The mode the controller is running in. Required when writing to the history.
@@ -302,7 +284,7 @@ def get_or_update_history(root_mount: str, config_data: dict, write: bool = Fals
302284
"""
303285
package_name, package_current_version = _get_package_information(config_data)
304286
# Create history dir if it doesn't already exist
305-
history_dir = get_history_dir(root_mount)
287+
history_dir = get_history_dir()
306288
os.makedirs(history_dir, exist_ok=True)
307289

308290
history_file = f"{history_dir}/{package_name}.json"
@@ -347,13 +329,13 @@ def get_or_update_history(root_mount: str, config_data: dict, write: bool = Fals
347329
if step and isinstance(step, UpgradeStep):
348330
step.arguments.extend([history_data["current-version"], package_current_version])
349331

350-
def summarize_check_results(results: list[bool], step_data: dict[Mode, list[Step|UpgradeStep]], step_selector: Mode, root_mount: str) -> bool:
332+
def summarize_check_results(results: list[bool], step_data: dict[Mode, list[Step|UpgradeStep]], step_selector: Mode, ) -> bool:
351333
"""
352334
Returning True means there is at least one failure
353335
"""
354-
flag_dir = get_flag_dir(root_mount)
355-
if not os.path.exists(flag_dir) or len(results) == 0:
356-
print("It does not look like you have successfully applied any steps yet.")
336+
flag_dir = get_flag_dir()
337+
if len(results) != len(step_data[step_selector]):
338+
print("It does not look like you have successfully run all check steps yet.")
357339
return True
358340

359341
# Any failure fails the whole thing
@@ -385,7 +367,7 @@ def make_config_data_from_resource_id() -> dict:
385367
}
386368
return config_data
387369

388-
def do_interrupt(interrupt_data: str, root_mount: str, copy_dir: str, on_host: bool) -> bool:
370+
def do_interrupt(interrupt_data: str, root_mount: str, copy_dir: str) -> bool:
389371
"""
390372
Run an interrupt if there hasn't been an interrupt already for the skyhook ID.
391373
"""
@@ -395,9 +377,9 @@ def do_interrupt(interrupt_data: str, root_mount: str, copy_dir: str, on_host: b
395377
config_data = make_config_data_from_resource_id()
396378

397379
interrupt = interrupts.inflate(interrupt_data)
398-
380+
os.chroot(root_mount)
399381
# Check if the interrupt has already been run for this particular skyhook resource
400-
interrupt_dir = f"{get_skyhook_directory(root_mount)}/interrupts/flags/{SKYHOOK_RESOURCE_ID}"
382+
interrupt_dir = f"{get_skyhook_directory()}/interrupts/flags/{SKYHOOK_RESOURCE_ID}"
401383
os.makedirs(interrupt_dir, exist_ok=True)
402384
for i, cmd in enumerate(interrupt.interrupt_cmd):
403385
interrupt_id = f"{interrupt._type()}_{i}"
@@ -412,9 +394,7 @@ def do_interrupt(interrupt_data: str, root_mount: str, copy_dir: str, on_host: b
412394

413395
return_code = _run(
414396
cmd,
415-
get_log_file(root_mount, f"interrupts/{interrupt_id}", copy_dir, config_data),
416-
root_mount=root_mount,
417-
on_host=on_host,
397+
get_log_file(f"interrupts/{interrupt_id}", copy_dir, config_data),
418398
write_cmds=True
419399
)
420400

@@ -428,9 +408,9 @@ def do_interrupt(interrupt_data: str, root_mount: str, copy_dir: str, on_host: b
428408
return False
429409

430410
## Remove all step flags after uninstall
431-
def remove_flags(step_data: dict[Mode, list[Step|UpgradeStep]], root_mount: str, config_data: dict) -> None:
411+
def remove_flags(step_data: dict[Mode, list[Step|UpgradeStep]], config_data: dict) -> None:
432412
for step in [step for steps in step_data.values() for step in steps]:
433-
flag_file = make_flag_path(step, root_mount, config_data)
413+
flag_file = make_flag_path(step, config_data)
434414
if os.path.exists(flag_file): # Check if the file exists before trying to remove it
435415
os.remove(flag_file)
436416

@@ -440,7 +420,7 @@ def main(mode: Mode, root_mount: str, copy_dir: str, interrupt_data: None|str, a
440420
return False
441421

442422
if mode == Mode.INTERRUPT:
443-
return do_interrupt(interrupt_data, root_mount, copy_dir, True)
423+
return do_interrupt(interrupt_data, root_mount, copy_dir)
444424

445425
_, SKYHOOK_DATA_DIR = _get_env_config()
446426

@@ -474,9 +454,9 @@ def agent_main(mode: Mode, root_mount: str, copy_dir: str, config_data: dict, in
474454

475455
# Pull out step_data so it matches with existing code
476456
step_data = config_data["modes"]
477-
457+
os.chroot(root_mount)
478458
# Make a flag to mark Skyhook has started
479-
set_flag(f"{get_flag_dir(root_mount)}/START")
459+
set_flag(f"{get_flag_dir()}/START")
480460
results = []
481461

482462
# If no steps configured for this mode but being run output warning that this is a no-op
@@ -487,19 +467,19 @@ def agent_main(mode: Mode, root_mount: str, copy_dir: str, config_data: dict, in
487467
# Make the flag file without the host path argument (first one). This is because in operator world
488468
# the host path is going to change every time the Skyhook Custom Resource changes so it would
489469
# look like a step hasn't been run when it fact it had.
490-
flag_file = make_flag_path(step, root_mount, config_data)
470+
flag_file = make_flag_path(step, config_data)
491471

492472
# If upgrading get the from and to versions from the history file
493473
# so it can be passed to the upgrade steps via args or environment vars
494474
if mode == Mode.UPGRADE or mode == Mode.UPGRADE_CHECK:
495-
get_or_update_history(root_mount, config_data, step=step)
475+
get_or_update_history(config_data, step=step)
496476

497477
if not str(mode).endswith("-check"):
498478
if check_flag_file(step, flag_file, always_run_step, mode):
499479
continue
500480
print(f"{mode} {step.path} {step.arguments} {step.returncodes} {step.idempotence} {step.on_host}")
501481

502-
failed = run_step(step, root_mount, copy_dir, config_data)
482+
failed = run_step(step, copy_dir, config_data)
503483
if failed:
504484
return True
505485

@@ -509,20 +489,20 @@ def agent_main(mode: Mode, root_mount: str, copy_dir: str, config_data: dict, in
509489
)
510490
else:
511491
print(f"{mode} {step.path} {step.arguments} {step.returncodes} {step.idempotence} {step.on_host}")
512-
results.append(run_step(step, root_mount, copy_dir, config_data))
492+
results.append(run_step(step, copy_dir, config_data))
513493

514494

515495
if mode in CHECK_TO_APPLY and len(step_data.get(mode, [])) > 0:
516-
if summarize_check_results(results, step_data, mode, root_mount):
496+
if summarize_check_results(results, step_data, mode):
517497
return True
518498

519499
## If APPLY_CHECK, UPGRADE_CHECK, or UNINSTALL_CHECK finished successfully update installed version history
520500
if mode in [Mode.APPLY_CHECK, Mode.UPGRADE_CHECK, Mode.UNINSTALL_CHECK]:
521-
get_or_update_history(root_mount, config_data, write=True, mode=mode)
501+
get_or_update_history(config_data, write=True, mode=mode)
522502

523503
## We also want to remove the flags if the package was uninstalled
524504
if mode == Mode.UNINSTALL_CHECK:
525-
remove_flags(step_data, root_mount, config_data)
505+
remove_flags(step_data, config_data)
526506

527507
return False
528508

@@ -577,10 +557,10 @@ def cli(sys_argv: list[str]=sys.argv):
577557
print(str.center("Directory CONFIGURATION", 20, "-"))
578558
# print flag dir and log dir
579559
config_data = make_config_data_from_resource_id()
580-
print(f"flag_dir: {get_flag_dir(root_mount)}/{config_data['package_name']}/{config_data['package_version']}")
581-
log_dir = '/'.join(get_log_file(root_mount, 'step',copy_dir, config_data, timestamp='timestamp').split('/')[:-1])
560+
print(f"flag_dir: {get_flag_dir()}/{config_data['package_name']}/{config_data['package_version']}")
561+
log_dir = '/'.join(get_log_file('step',copy_dir, config_data, timestamp='timestamp').split('/')[:-1])
582562
print(f"log_dir: {log_dir}")
583-
print(f"history_file: {get_history_dir(root_mount)}/{config_data['package_name']}.json")
563+
print(f"history_file: {get_history_dir()}/{config_data['package_name']}.json")
584564
print("-" * 20)
585565

586566
return main(mode, root_mount, copy_dir, interrupt_data, always_run_step)

0 commit comments

Comments
 (0)