Skip to content

Commit bfbb3c3

Browse files
committed
WIP: further progress with test
1 parent 7170049 commit bfbb3c3

File tree

7 files changed

+136
-4
lines changed

7 files changed

+136
-4
lines changed

pulsar/client/action_mapper.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,9 @@ def __init__(self, source, file_lister=None, url=None):
543543
def from_dict(cls, action_dict):
544544
return JsonTransferAction(source=action_dict["source"], url=action_dict["url"])
545545

546+
def to_dict(self):
547+
return self._extend_base_dict(url=self.url)
548+
546549
def write_to_path(self, path):
547550
self._to_path = path
548551

pulsar/client/client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,7 @@ def launch(
498498
# 4. stage outputs back using manifest [handled by ARC]
499499
pass
500500

501+
501502
class CoexecutionLaunchMixin(BaseRemoteConfiguredJobClient):
502503
execution_type: ExecutionType
503504
pulsar_container_image: str

pulsar/client/staging/up.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,13 @@ def submit_job(client, client_job_description, job_config=None):
7272
launch_kwds["dynamic_file_sources"] = client_job_description.client_outputs.dynamic_file_sources
7373
launch_kwds["token_endpoint"] = client.token_endpoint
7474

75+
# populate `to_path`
76+
for action in file_stager.action_mapper.actions:
77+
name = action.path
78+
input_type = "input"
79+
path = file_stager.job_directory.calculate_path(name, input_type)
80+
action.write_to_path(path)
81+
7582
staging_manifest = file_stager.action_mapper.finalize()
7683
if staging_manifest:
7784
launch_kwds["staging_manifest"] = staging_manifest

pulsar/scripts/staging_arc.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/usr/bin/env python
2+
3+
"""Stage files in or out of a compute environment made available via the Advanced Resource Connector (ARC) [1].
4+
5+
This script reads a set of source and target URL (with `http`, `https` or `file` as URL scheme) and/or path pairs passed
6+
either as command line arguments and/or from a file in the form of a JSON array. It then reads the files from the source
7+
URLs and posts (copies them for `file://` urls) them to the target URLs.
8+
9+
Example usage:
10+
11+
```shell
12+
$ ./staging_arc.py --stage https://example.org file.dat --stage file:///home/user/text.txt https://example.org/files \
13+
--json staging_manifest.json
14+
```
15+
16+
_staging_manifest.json_
17+
```json
18+
[
19+
{
20+
"source": "file:///home/user/data.txt",
21+
"target": "file:///home/person/file.txt"
22+
},
23+
{
24+
"source": "file:///home/user/analysis.txt",
25+
"target": "https://example.org/files/analysis.txt"
26+
}
27+
]
28+
```
29+
30+
Retrieve files from a set of source URLs and save them to a set of target URLs.
31+
32+
References:
33+
- [1] https://www.nordugrid.org/arc/about-arc.html
34+
"""
35+
36+
# When the URL is the target, use POST.
37+
38+
import aiohttp
39+
import json
40+
import sys
41+
from typing import Iterable
42+
from typing import Literal
43+
from dataclasses import dataclass, field
44+
from argparse import ArgumentParser
45+
from typing import Optional
46+
47+
48+
@dataclass
49+
class StagingDeclaration:
50+
"""Declare where to read a file from and where to save it to."""
51+
52+
source: str # a URL
53+
target: str # a URL
54+
55+
56+
...
57+
58+
59+
def parse_json_manifest() -> tuple[StagingDeclaration]:
60+
...
61+
62+
63+
HELP_STAGE = "Read a file from `source` and save it to `target`."
64+
HELP_JSON = "Read a list of `source` and `target` URLs from a JSON file."
65+
66+
67+
def make_parser() -> ArgumentParser:
68+
"""Construct an argument parser used to call the script from the command line."""
69+
70+
module_docstring = sys.modules[__name__].__doc__
71+
72+
parser = ArgumentParser(description=module_docstring)
73+
74+
parser.add_argument(
75+
"--stage", dest="stage", metavar=("source", "target"), nargs=2, action="append", help=HELP_STAGE
76+
)
77+
parser.add_argument("--json", dest="json", nargs=1, action="append", help=HELP_JSON)
78+
79+
return parser
80+
81+
82+
if __name__ == "__main__":
83+
"""Invoke script from the command line."""
84+
argument_parser = make_parser()
85+
args = argument_parser.parse_args(sys.argv[1:])

pulsar/scripts/submit.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def main(args=None):
1515
add_common_submit_args(arg_parser)
1616
arg_parser.add_argument('--wait', action='store_true')
1717
arg_parser.add_argument('--no_wait', "--no-wait", dest='wait', action='store_false')
18+
arg_parser.add_argument('--build_client_manager', action='store_true')
1819
arg_parser.set_defaults(wait=True)
1920
args = arg_parser.parse_args(args)
2021
run_server_for_job(args)

pulsar/scripts/submit_util.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
import logging
55
import time
66

7+
from pulsar.client import ClientJobDescription, ClientOutputs, ClientInput
8+
from pulsar.client import submit_job as submit_client_job
9+
from pulsar.client.manager import build_client_manager
710
from pulsar.client.util import from_base64_json
811
from pulsar.main import (
912
load_pulsar_app,
@@ -32,6 +35,23 @@ def run_server_for_job(args):
3235
submit_job(manager, job_config)
3336
if wait:
3437
log.info("Co-execution job setup, now waiting for job completion and postprocessing.")
38+
if args.build_client_manager:
39+
client_manager = build_client_manager(arc_enabled=True)
40+
client = client_manager.get_client({"arc_url": "http://localhost:8082", "jobs_directory": app.staging_directory}, job_id=job_config["job_id"], default_file_action=job_config["remote_staging"]["action_mapper"]["default_action"], files_endpoint=job_config["remote_staging"]["action_mapper"]["files_endpoint"])
41+
# FIXME: we can probably only test the input staging here, so adjust tests accordingly
42+
client_inputs = [
43+
ClientInput(path=action_source["path"], input_type="input_path")
44+
for action_source in job_config["remote_staging"]["client_inputs"]
45+
]
46+
client_outputs = ClientOutputs.from_dict(job_config["remote_staging"]["client_outputs"])
47+
job_description = ClientJobDescription(
48+
command_line=job_config["command_line"],
49+
working_directory=client_outputs.working_directory,
50+
client_inputs=client_inputs,
51+
client_outputs=client_outputs,
52+
)
53+
job_config["working_directory"] = client_outputs.working_directory
54+
submit_client_job(client, job_description)
3555
wait_for_job(manager, job_config)
3656
log.info("Leaving finish_execution and shutting down app")
3757
except BaseException:

test/test_cli_submit.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
temp_directory_persist,
1010
)
1111

12-
from pulsar.client import ClientOutputs
12+
from pulsar.client import ClientOutputs, ClientInput
1313
from pulsar.client.util import to_base64_json
1414
from pulsar.scripts import submit
1515

@@ -25,19 +25,33 @@ def setup_action_mapper(self, files_endpoint):
2525
def run_and_check_submission(self):
2626
job_id = "0"
2727
galaxy_working = temp_directory_persist()
28+
input_name = "dataset_1.dat"
2829
output_name = "dataset_1211231231231231231.dat"
30+
galaxy_input = os.path.join(galaxy_working, input_name)
31+
with open(galaxy_input, "w") as handle:
32+
handle.write("cow_file_contents\n")
2933
galaxy_output = os.path.join(galaxy_working, output_name)
34+
pulsar_input = os.path.join(self.staging_directory, job_id, "inputs", input_name)
3035
pulsar_output = os.path.join(
3136
self.staging_directory, job_id, "outputs", output_name
3237
)
33-
pulsar_input = os.path.join(self.staging_directory, job_id, "inputs", "cow")
3438
with files_server("/") as test_files_server:
3539
files_endpoint = test_files_server.application_url
3640
action = {
3741
"name": "cow",
3842
"type": "input",
39-
"action": {"action_type": "message", "contents": "cow file contents\n"},
43+
"action": {
44+
"action_type": "json_transfer",
45+
"files_endpoint": files_endpoint,
46+
"path": galaxy_input,
47+
},
4048
}
49+
client_inputs = [
50+
ClientInput(
51+
path=galaxy_input,
52+
input_type="input_path",
53+
).action_source
54+
]
4155
client_outputs = ClientOutputs(
4256
working_directory=galaxy_working,
4357
output_files=[os.path.join(galaxy_working, output_name)],
@@ -52,12 +66,13 @@ def run_and_check_submission(self):
5266
remote_staging={
5367
"setup": [action],
5468
"action_mapper": self.setup_action_mapper(files_endpoint),
69+
"client_inputs": client_inputs,
5570
"client_outputs": client_outputs.to_dict(),
5671
},
5772
)
5873
base64 = to_base64_json(launch_params)
5974
assert not os.path.exists(galaxy_output)
60-
submit.main(["--build_client_manager", "true", "--base64", base64] + self._encode_application())
75+
submit.main(["--build_client_manager", "--base64", base64] + self._encode_application())
6176
assert os.path.exists(galaxy_output)
6277
out_contents = open(galaxy_output).read()
6378
assert out_contents == "cow file contents\n", out_contents

0 commit comments

Comments
 (0)