Skip to content

Commit 71085e2

Browse files
committed
Draft of JSON manifest collector
This should be the general strategy for collecting input and output files for ARC, DIRAC, AWS batch etc.
1 parent 198dc8a commit 71085e2

File tree

9 files changed

+168
-13
lines changed

9 files changed

+168
-13
lines changed

pulsar/client/action_mapper.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,9 @@ def __init__(self, client=None, config=None):
190190
self.ssh_port = config.get("ssh_port", None)
191191
self.mappers = mappers_from_dicts(config.get("paths", []))
192192
self.files_endpoint = config.get("files_endpoint", None)
193+
self.actions = []
194+
# Might want to make the working directory available here so that we know where to place archive
195+
# for archive action
193196

194197
def action(self, source, type, mapper=None):
195198
path = source.get("path", None)
@@ -202,8 +205,12 @@ def action(self, source, type, mapper=None):
202205
action_kwds = mapper.action_kwds
203206
action = action_class(source, file_lister=file_lister, **action_kwds)
204207
self.__process_action(action, type)
208+
self.actions.append(action)
205209
return action
206210

211+
def finalize(self):
212+
return [_ for _ in (action.finalize() for action in self.actions) if _]
213+
207214
def unstructured_mappers(self):
208215
""" Return mappers that will map 'unstructured' files (i.e. go beyond
209216
mapping inputs, outputs, and config files).
@@ -342,6 +349,9 @@ def _extend_base_dict(self, **kwds):
342349
base_dict.update(**kwds)
343350
return base_dict
344351

352+
def finalize(self):
353+
pass
354+
345355
def to_dict(self):
346356
return self._extend_base_dict()
347357

@@ -513,6 +523,35 @@ def write_from_path(self, pulsar_path):
513523
tus_upload_file(self.url, pulsar_path)
514524

515525

526+
class JsonTransferAction(BaseAction):
527+
"""
528+
This action indicates that the pulsar server should create a JSON manifest that can be used to stage files by an
529+
external system that can stage files in and out of the compute environment.
530+
"""
531+
inject_url = True
532+
whole_directory_transfer_supported = True
533+
action_type = "json_transfer"
534+
staging = STAGING_ACTION_REMOTE
535+
536+
def __init__(self, source, file_lister=None, url=None):
537+
super().__init__(source, file_lister)
538+
self.url = url
539+
self._path = None
540+
541+
@classmethod
542+
def from_dict(cls, action_dict):
543+
return JsonTransferAction(source=action_dict["source"], url=action_dict["url"])
544+
545+
def write_to_path(self, path):
546+
self._path = path
547+
548+
def write_from_path(self, pulsar_path: str):
549+
self._path = pulsar_path
550+
551+
def finalize(self):
552+
return {"url": self.url, "path": self.path}
553+
554+
516555
class RemoteObjectStoreCopyAction(BaseAction):
517556
"""
518557
"""
@@ -664,6 +703,7 @@ def write_to_path(self, path):
664703

665704

666705
DICTIFIABLE_ACTION_CLASSES = [
706+
JsonTransferAction,
667707
RemoteCopyAction,
668708
RemoteTransferAction,
669709
RemoteTransferTusAction,
@@ -844,6 +884,7 @@ def unstructured_map(self, path):
844884

845885
ACTION_CLASSES: List[Type[BaseAction]] = [
846886
NoneAction,
887+
JsonTransferAction,
847888
RewriteAction,
848889
TransferAction,
849890
CopyAction,

pulsar/client/client.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def __init__(self, destination_params, job_id, job_manager_interface):
168168
self.job_manager_interface = job_manager_interface
169169

170170
def launch(self, command_line, dependencies_description=None, env=None, remote_staging=None, job_config=None,
171-
dynamic_file_sources=None, token_endpoint=None):
171+
dynamic_file_sources=None, token_endpoint=None, staging_manifest=None):
172172
"""
173173
Queue up the execution of the supplied `command_line` on the remote
174174
server. Called launch for historical reasons, should be renamed to
@@ -405,7 +405,7 @@ def _build_status_request_message(self):
405405
class MessageJobClient(BaseMessageJobClient):
406406

407407
def launch(self, command_line, dependencies_description=None, env=None, remote_staging=None, job_config=None,
408-
dynamic_file_sources=None, token_endpoint=None):
408+
dynamic_file_sources=None, token_endpoint=None, staging_manifest=None):
409409
"""
410410
"""
411411
launch_params = self._build_setup_message(
@@ -439,7 +439,7 @@ def __init__(self, destination_params, job_id, client_manager, shell):
439439
self.shell = shell
440440

441441
def launch(self, command_line, dependencies_description=None, env=None, remote_staging=None, job_config=None,
442-
dynamic_file_sources=None, token_endpoint=None):
442+
dynamic_file_sources=None, token_endpoint=None, staging_manifest=None):
443443
"""
444444
"""
445445
launch_params = self._build_setup_message(
@@ -477,6 +477,24 @@ class ExecutionType(str, Enum):
477477
PARALLEL = "parallel"
478478

479479

480+
class LocalSequentialLaunchMixin(BaseRemoteConfiguredJobClient):
481+
482+
def launch(
483+
self,
484+
command_line,
485+
dependencies_description=None,
486+
env=None,
487+
remote_staging=None,
488+
job_config=None,
489+
dynamic_file_sources=None,
490+
container_info=None,
491+
token_endpoint=None,
492+
pulsar_app_config=None,
493+
staging_manifest=None
494+
) -> Optional[ExternalId]:
495+
pass
496+
497+
480498
class CoexecutionLaunchMixin(BaseRemoteConfiguredJobClient):
481499
execution_type: ExecutionType
482500
pulsar_container_image: str
@@ -491,7 +509,8 @@ def launch(
491509
dynamic_file_sources=None,
492510
container_info=None,
493511
token_endpoint=None,
494-
pulsar_app_config=None
512+
pulsar_app_config=None,
513+
staging_manifest=None
495514
) -> Optional[ExternalId]:
496515
"""
497516
"""
@@ -756,6 +775,12 @@ def raw_check_complete(self) -> Dict[str, Any]:
756775
}
757776

758777

778+
class LocalSequentialClient(BaseMessageCoexecutionJobClient, LocalSequentialLaunchMixin):
779+
780+
def __init__(self, destination_params, job_id, client_manager):
781+
super().__init__(destination_params, job_id, client_manager)
782+
783+
759784
class TesPollingCoexecutionJobClient(BasePollingCoexecutionJobClient, LaunchesTesContainersMixin):
760785
"""A client that co-executes pods via GA4GH TES and depends on amqp for status updates."""
761786

pulsar/client/staging/down.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ def collect(self):
7272
self.__collect_other_working_directory_files()
7373
self.__collect_metadata_directory_files()
7474
self.__collect_job_directory_files()
75+
# Give actions that require a final action, like those that write a manifest, to write out their content
76+
self.__finalize_action_mapper()
77+
# finalize collection here for executors that need this ?
7578
return self.exception_tracker.collection_failure_exceptions
7679

7780
def __collect_working_directory_outputs(self):
@@ -134,6 +137,9 @@ def __collect_job_directory_files(self):
134137
'output_jobdir',
135138
)
136139

140+
def __finalize_action_mapper(self):
141+
self.action_mapper.finalize()
142+
137143
def __realized_dynamic_file_source_references(self):
138144
references = {"filename": [], "extra_files": []}
139145

pulsar/client/staging/up.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,11 @@ def submit_job(client, client_job_description, job_config=None):
7171
# it needs to be in the response to Pulsar even Pulsar is inititing staging actions
7272
launch_kwds["dynamic_file_sources"] = client_job_description.client_outputs.dynamic_file_sources
7373
launch_kwds["token_endpoint"] = client.token_endpoint
74+
75+
staging_manifest = file_stager.action_mapper.finalize()
76+
if staging_manifest:
77+
launch_kwds["staging_manifest"] = staging_manifest
78+
7479
# for pulsar modalities that skip the explicit "setup" step, give them a chance to set an external
7580
# id from the submission process (e.g. to TES).
7681
launch_response = client.launch(**launch_kwds)

pulsar/managers/staging/pre.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ def preprocess(job_directory, setup_actions, action_executor, object_store=None)
1414
action = from_dict(setup_action["action"])
1515
if getattr(action, "inject_object_store", False):
1616
action.object_store = object_store
17+
if getattr(action, "inject_job_directory", False):
18+
action.job_directory = job_directory
1719
path = job_directory.calculate_path(name, input_type)
1820
description = "Staging {} '{}' via {} to {}".format(input_type, name, action, path)
1921
log.debug(description)

test/action_mapper_test.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,17 @@
44
)
55

66

7+
def test_action_mapper_finalization():
8+
client = _client("json_transfer")
9+
mapper = FileActionMapper(client)
10+
mapper.action({'path': '/opt/galaxy/tools/filters/catWrapper.py'}, 'input')
11+
mapper.action({'path': '/the_file'}, 'input')
12+
mapper_summary = mapper.finalize()
13+
assert len(mapper_summary) == 2
14+
assert mapper_summary[0]["path"] == '/opt/galaxy/tools/filters/catWrapper.py'
15+
assert mapper_summary[1]["path"] == '/the_file'
16+
17+
718
def test_endpoint_validation():
819
client = _min_client("remote_transfer")
920
mapper = FileActionMapper(client)

test/client_test.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ def test_launch():
125125
request_checker.assert_called()
126126

127127

128+
def test_sequential_local():
129+
pass
130+
131+
128132
def __test_upload(upload_type):
129133
client = TestClient()
130134
(temp_fileno, temp_file_path) = tempfile.mkstemp()

test/integration_test_cli_submit.py renamed to test/test_cli_submit.py

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,28 @@
1616

1717
class BaseCliTestCase(TempDirectoryTestCase):
1818

19+
def setup_action_mapper(self, files_endpoint):
20+
return {
21+
"default_action": "remote_transfer",
22+
"files_endpoint": files_endpoint,
23+
}
24+
1925
def run_and_check_submission(self):
2026
job_id = "0"
2127
galaxy_working = temp_directory_persist()
2228
output_name = "dataset_1211231231231231231.dat"
2329
galaxy_output = os.path.join(galaxy_working, output_name)
24-
pulsar_output = os.path.join(self.staging_directory, job_id, "outputs", output_name)
30+
pulsar_output = os.path.join(
31+
self.staging_directory, job_id, "outputs", output_name
32+
)
2533
pulsar_input = os.path.join(self.staging_directory, job_id, "inputs", "cow")
2634
with files_server("/") as test_files_server:
2735
files_endpoint = test_files_server.application_url
28-
action = {"name": "cow", "type": "input", "action": {"action_type": "message", "contents": "cow file contents\n"}}
36+
action = {
37+
"name": "cow",
38+
"type": "input",
39+
"action": {"action_type": "message", "contents": "cow file contents\n"},
40+
}
2941
client_outputs = ClientOutputs(
3042
working_directory=galaxy_working,
3143
output_files=[os.path.join(galaxy_working, output_name)],
@@ -39,10 +51,7 @@ def run_and_check_submission(self):
3951
setup=True,
4052
remote_staging={
4153
"setup": [action],
42-
"action_mapper": {
43-
"default_action": "remote_transfer",
44-
"files_endpoint": files_endpoint,
45-
},
54+
"action_mapper": self.setup_action_mapper(files_endpoint),
4655
"client_outputs": client_outputs.to_dict(),
4756
},
4857
)
@@ -74,7 +83,8 @@ def test(self):
7483
def _encode_application(self):
7584
app_conf = dict(
7685
staging_directory=self.staging_directory,
77-
message_queue_url="memory://submittest"
86+
message_queue_url="memory://submittest",
87+
conda_auto_init=False,
7888
)
7989
app_conf_path = os.path.join(self.config_directory, "app.yml")
8090
with open(app_conf_path, "w") as f:
@@ -93,6 +103,29 @@ def test(self):
93103
def _encode_application(self):
94104
app_conf = dict(
95105
staging_directory=self.staging_directory,
96-
message_queue_url="memory://submittest"
106+
message_queue_url="memory://submittest",
107+
conda_auto_init=False,
108+
)
109+
return ["--app_conf_base64", to_base64_json(app_conf)]
110+
111+
112+
class SequentialLocalCommandLineAppConfigTestCase(BaseCliTestCase):
113+
114+
@skip_unless_module("kombu")
115+
@integration_test
116+
def test(self):
117+
self.run_and_check_submission()
118+
119+
def setup_action_mapper(self, files_endpoint):
120+
return {
121+
"default_action": "json_transfer",
122+
"files_endpoint": files_endpoint,
123+
}
124+
125+
def _encode_application(self):
126+
app_conf = dict(
127+
staging_directory=self.staging_directory,
128+
message_queue_url="memory://submittest",
129+
conda_auto_init=False,
97130
)
98131
return ["--app_conf_base64", to_base64_json(app_conf)]

test/transfer_action_test.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,35 @@
11
import os
22

33
from .test_utils import files_server
4-
from pulsar.client.action_mapper import RemoteTransferAction
4+
from pulsar.client.action_mapper import (
5+
JsonTransferAction,
6+
RemoteTransferAction,
7+
)
8+
9+
10+
def test_write_to_path_json():
11+
with files_server() as (server, directory):
12+
from_path = os.path.join(directory, "remote_get")
13+
14+
to_path = os.path.join(directory, "local_get")
15+
url = server.application_url + "?path=%s" % from_path
16+
action = JsonTransferAction({"path": to_path}, url=url)
17+
action.write_to_path(to_path)
18+
assert action.path == to_path
19+
assert action.url == url
20+
assert action.finalize() == {"path": to_path, "url": url}
21+
22+
23+
def test_write_from_file_json():
24+
with files_server() as (server, directory):
25+
from_path = os.path.join(directory, "local_post")
26+
to_path = os.path.join(directory, "remote_post")
27+
url = server.application_url + "?path=%s" % to_path
28+
action = JsonTransferAction({"path": to_path}, url=url)
29+
action.write_from_path(from_path)
30+
assert action.path == to_path
31+
assert action.url == url
32+
assert action.finalize() == {"path": to_path, "url": url}
533

634

735
def test_write_to_file():

0 commit comments

Comments
 (0)