Skip to content

Commit 7769f63

Browse files
authored
Merge pull request #20553 from mvdbeek/live_fix_cry_emoji
[25.0] Propagate cached job output replacement to copies of outputs
2 parents 02179ac + b691b93 commit 7769f63

File tree

3 files changed

+79
-15
lines changed

3 files changed

+79
-15
lines changed

lib/galaxy/model/__init__.py

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@
127127
reconstructor,
128128
registry,
129129
relationship,
130+
remote,
130131
validates,
131132
)
132133
from sqlalchemy.orm.attributes import flag_modified
@@ -5475,6 +5476,7 @@ class HistoryDatasetAssociation(DatasetInstance, HasTags, Dictifiable, UsesAnnot
54755476
dataset_id: Mapped[Optional[int]]
54765477
hidden_beneath_collection_instance: Mapped[Optional["HistoryDatasetCollectionAssociation"]]
54775478
tags: Mapped[List["HistoryDatasetAssociationTagAssociation"]]
5479+
copied_to_history_dataset_associations: Mapped[List["HistoryDatasetAssociation"]]
54785480

54795481
def __init__(
54805482
self,
@@ -5562,6 +5564,9 @@ def copy_from(self, other_hda, new_dataset=None, include_tags=True, include_meta
55625564
self.copy_tags_from(self.user, other_hda)
55635565
self.dataset = new_dataset or other_hda.dataset
55645566
self.copied_from_history_dataset_association_id = other_hda.id
5567+
for copied_hda in self.copied_to_history_dataset_associations:
5568+
copied_hda.copy_from(self, include_tags=include_tags, include_metadata=include_metadata)
5569+
55655570
if old_dataset:
55665571
old_dataset.full_delete()
55675572

@@ -11946,15 +11951,34 @@ def __repr__(self):
1194611951
lazy="joined",
1194711952
back_populates="history_associations",
1194811953
),
11954+
copied_to_history_dataset_associations=relationship(
11955+
"HistoryDatasetAssociation",
11956+
primaryjoin=lambda: and_(
11957+
HistoryDatasetAssociation.id
11958+
== remote(HistoryDatasetAssociation.copied_from_history_dataset_association_id),
11959+
# Include dataset_id, not technically necessary but allows filtering early
11960+
# and avoid the need for an index on copied_from_history_dataset_association_id
11961+
HistoryDatasetAssociation.dataset_id == remote(HistoryDatasetAssociation.dataset_id),
11962+
),
11963+
remote_side=lambda: [
11964+
HistoryDatasetAssociation.copied_from_history_dataset_association_id,
11965+
HistoryDatasetAssociation.dataset_id,
11966+
],
11967+
back_populates="copied_from_history_dataset_association",
11968+
),
1194911969
copied_from_history_dataset_association=relationship(
11950-
HistoryDatasetAssociation,
11951-
primaryjoin=(
11952-
HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id
11953-
== HistoryDatasetAssociation.table.c.id
11970+
"HistoryDatasetAssociation",
11971+
primaryjoin=lambda: and_(
11972+
HistoryDatasetAssociation.copied_from_history_dataset_association_id
11973+
== remote(HistoryDatasetAssociation.id),
11974+
HistoryDatasetAssociation.dataset_id == remote(HistoryDatasetAssociation.dataset_id),
1195411975
),
11955-
remote_side=[HistoryDatasetAssociation.table.c.id],
11956-
uselist=False,
11976+
remote_side=lambda: [
11977+
HistoryDatasetAssociation.id,
11978+
HistoryDatasetAssociation.dataset_id,
11979+
],
1195711980
back_populates="copied_to_history_dataset_associations",
11981+
uselist=False,
1195811982
),
1195911983
copied_from_library_dataset_dataset_association=relationship(
1196011984
LibraryDatasetDatasetAssociation,
@@ -11964,14 +11988,6 @@ def __repr__(self):
1196411988
),
1196511989
back_populates="copied_to_history_dataset_associations",
1196611990
),
11967-
copied_to_history_dataset_associations=relationship(
11968-
HistoryDatasetAssociation,
11969-
primaryjoin=(
11970-
HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id
11971-
== HistoryDatasetAssociation.table.c.id
11972-
),
11973-
back_populates="copied_from_history_dataset_association",
11974-
),
1197511991
copied_to_library_dataset_dataset_associations=relationship(
1197611992
LibraryDatasetDatasetAssociation,
1197711993
primaryjoin=(

lib/galaxy/model/store/__init__.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,17 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs):
531531

532532
model_class = dataset_attrs.get("model_class", "HistoryDatasetAssociation")
533533
if model_class == "HistoryDatasetAssociation":
534+
# Check if this HDA should reuse a dataset from a copied-from HDA
535+
reuse_dataset = None
536+
copied_from_chain = dataset_attrs.get("copied_from_history_dataset_association_id_chain", [])
537+
if copied_from_chain:
538+
# Look for the source HDA in the current import set
539+
copied_from_key = _copied_from_object_key(copied_from_chain, object_import_tracker.hdas_by_key)
540+
if copied_from_key and copied_from_key in object_import_tracker.hdas_by_key:
541+
source_hda = object_import_tracker.hdas_by_key[copied_from_key]
542+
# Reuse the dataset from the source HDA
543+
reuse_dataset = source_hda.dataset
544+
534545
# Create dataset and HDA.
535546
dataset_instance = model.HistoryDatasetAssociation(
536547
name=dataset_attrs["name"],
@@ -545,7 +556,8 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs):
545556
tool_version=metadata.get("tool_version"),
546557
metadata_deferred=metadata_deferred,
547558
history=history,
548-
create_dataset=True,
559+
create_dataset=reuse_dataset is None,
560+
dataset=reuse_dataset,
549561
flush=False,
550562
sa_session=self.sa_session,
551563
)

lib/galaxy_test/api/test_tools.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,6 +1057,42 @@ def test_run_cat1_use_cached_job(self):
10571057
assert len(filenames) == 3, filenames
10581058
assert len(set(filenames)) <= 2, filenames
10591059

1060+
@skip_without_tool("cat1")
1061+
@requires_new_history
1062+
def test_run_cat1_use_cached_job_build_list(self):
1063+
with self.dataset_populator.test_history_for(self.test_run_cat1_use_cached_job) as history_id:
1064+
# Run simple non-upload tool with an input data parameter.
1065+
inputs = self._get_cat1_inputs(history_id)
1066+
outputs_one = self._run_cat1(history_id, inputs=inputs, assert_ok=True, wait_for_job=True)
1067+
outputs_two = self._run_cat1(
1068+
history_id, inputs=inputs, use_cached_job=False, assert_ok=True, wait_for_job=True
1069+
)
1070+
# Rename inputs. Job should still be cached since cat1 doesn't look at name attribute
1071+
self.dataset_populator.rename_dataset(inputs["input1"]["id"])
1072+
outputs_three = self._run_cat1(
1073+
history_id, inputs=inputs, use_cached_job=True, assert_ok=False, wait_for_job=False
1074+
).json()
1075+
outputs_four = self._run(
1076+
"__BUILD_LIST__",
1077+
history_id=history_id,
1078+
inputs={"datasets_0|input": {"src": "hda", "id": outputs_three["outputs"][0]["id"]}},
1079+
).json()
1080+
self.dataset_populator.wait_for_job(outputs_three["jobs"][0]["id"])
1081+
dataset_details = []
1082+
for output in [outputs_one, outputs_two, outputs_three]:
1083+
output_id = output["outputs"][0]["id"]
1084+
dataset_details.append(self._get(f"datasets/{output_id}").json())
1085+
assert self._get(f"jobs/{output['jobs'][0]['id']}/metrics").json()
1086+
filenames = [dd["file_name"] for dd in dataset_details]
1087+
assert len(filenames) == 3, filenames
1088+
assert len(set(filenames)) <= 2, filenames
1089+
hdca = self.dataset_populator.get_history_collection_details(
1090+
history_id, content_id=outputs_four["output_collections"][0]["id"]
1091+
)
1092+
assert self.dataset_populator.get_history_dataset_content(
1093+
history_id, content_id=hdca["elements"][0]["object"]["id"]
1094+
)
1095+
10601096
@skip_without_tool("cat_list")
10611097
@skip_without_tool("__SORTLIST__")
10621098
def test_run_cat_list_hdca_sort_order_respecrted_use_cached_job(self):

0 commit comments

Comments
 (0)