Skip to content

Commit a591aa8

Browse files
authored
Merge pull request #398 from broadinstitute/development
Release 1.42.0
2 parents 9cde31b + 07a3ae6 commit a591aa8

File tree

14 files changed

+16
-387
lines changed

14 files changed

+16
-387
lines changed

.github/workflows/minify_ontologies.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ on:
1010
jobs:
1111
build:
1212
runs-on: ubuntu-latest
13+
permissions: write-all
1314

1415
steps:
1516
- name: Checkout code

ingest/cell_metadata.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
from validation.validate_metadata import (
2727
report_issues,
2828
validate_input_metadata,
29-
write_metadata_to_bq,
3029
)
3130
except ImportError:
3231
# Used when importing as external package, e.g. imports in single_cell_portal code
@@ -180,8 +179,7 @@ def conforms_to_metadata_convention(self):
180179
json_file = convention_file_object.open_file(self.JSON_CONVENTION)
181180
convention = json.load(json_file)
182181

183-
import_to_bq = self.kwargs["bq_dataset"] and self.kwargs["bq_table"]
184-
validate_input_metadata(self, convention, bq_json=import_to_bq)
182+
validate_input_metadata(self, convention)
185183

186184
json_file.close()
187185
return not report_issues(self)

ingest/cli_parser.py

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -3,39 +3,12 @@
33
import argparse
44
import ast
55

6-
from google.cloud import bigquery
76
from google.cloud.exceptions import NotFound
87

98

109
# Ingest file types
1110
EXPRESSION_FILE_TYPES = ["dense", "mtx", "h5ad"]
1211

13-
14-
def bq_dataset_exists(dataset):
15-
bigquery_client = bigquery.Client()
16-
dataset_ref = bigquery_client.dataset(dataset)
17-
exists = False
18-
try:
19-
bigquery_client.get_dataset(dataset_ref)
20-
exists = True
21-
except NotFound:
22-
print(f"Dataset {dataset} not found")
23-
return exists
24-
25-
26-
def bq_table_exists(dataset, table):
27-
bigquery_client = bigquery.Client()
28-
dataset_ref = bigquery_client.dataset(dataset)
29-
table_ref = dataset_ref.table(table)
30-
exists = False
31-
try:
32-
bigquery_client.get_table(table_ref)
33-
exists = True
34-
except NotFound:
35-
print(f"Dataset {table} not found")
36-
return exists
37-
38-
3912
def validate_arguments(parsed_args):
4013
"""Verify parsed input arguments
4114
@@ -54,25 +27,6 @@ def validate_arguments(parsed_args):
5427
"must include .genes.tsv, and .barcodes.tsv files. See --help for "
5528
"more information"
5629
)
57-
if "ingest_cell_metadata" in parsed_args:
58-
if (parsed_args.bq_dataset is not None and parsed_args.bq_table is None) or (
59-
parsed_args.bq_dataset is None and parsed_args.bq_table is not None
60-
):
61-
raise ValueError(
62-
"Missing argument: --bq_dataset and --bq_table are both required for BigQuery upload."
63-
)
64-
if parsed_args.bq_dataset is not None and not bq_dataset_exists(
65-
parsed_args.bq_dataset
66-
):
67-
raise ValueError(
68-
f" Invalid argument: unable to connect to a BigQuery dataset called {parsed_args.bq_dataset}."
69-
)
70-
if parsed_args.bq_table is not None and not bq_table_exists(
71-
parsed_args.bq_dataset, parsed_args.bq_table
72-
):
73-
raise ValueError(
74-
f" Invalid argument: unable to connect to a BigQuery table called {parsed_args.bq_table}."
75-
)
7630
if (
7731
"differential_expression" in parsed_args
7832
and parsed_args.annotation_type != "group"
@@ -191,12 +145,6 @@ def create_parser():
191145
required=True,
192146
help="Single study accession associated with ingest files.",
193147
)
194-
parser_cell_metadata.add_argument(
195-
"--bq-dataset", help="BigQuery dataset identifer for ingest job."
196-
)
197-
parser_cell_metadata.add_argument(
198-
"--bq-table", help="BigQuery table identifer for ingest job."
199-
)
200148
parser_cell_metadata.add_argument(
201149
"--ingest-cell-metadata",
202150
required=True,

ingest/ingest_pipeline.py

Lines changed: 3 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,10 @@
2121
2222
# Ingest Cell Metadata file against convention
2323
!! Please note that you must have a pre-configured BigQuery table available
24-
python ingest_pipeline.py --study-id 5d276a50421aa9117c982845 --study-file-id 5dd5ae25421aa910a723a337 ingest_cell_metadata --cell-metadata-file ../tests/data/annotation/metadata/convention/valid_no_array_v2.0.0.txt --study-accession SCP123 --ingest-cell-metadata --validate-convention --bq-dataset cell_metadata --bq-table alexandria_convention
24+
python ingest_pipeline.py --study-id 5d276a50421aa9117c982845 --study-file-id 5dd5ae25421aa910a723a337 ingest_cell_metadata --cell-metadata-file ../tests/data/annotation/metadata/convention/valid_no_array_v2.0.0.txt --study-accession SCP123 --ingest-cell-metadata --validate-convention
2525
26-
# Ingest Cell Metadata file against convention AND booleanize has_<modality> metadata for BigQuery
27-
#### BQ schema must be updated for each has_<modality>
28-
python ingest_pipeline.py --study-id addedfeed000000000000000 --study-file-id dec0dedfeed1111111111111 ingest_cell_metadata --cell-metadata-file ../tests/data/annotation/metadata/convention/brain_rf1/patchseq_classic_metadata_has_modality_10.tsv --study-accession SCPPR344 --ingest-cell-metadata --validate-convention --has-modality "['electrophysiology', 'morphology']" --bq-dataset cell_metadata_development --bq-table alexandria_convention
26+
# Ingest Cell Metadata file against convention with has_<modality> metadata
27+
python ingest_pipeline.py --study-id addedfeed000000000000000 --study-file-id dec0dedfeed1111111111111 ingest_cell_metadata --cell-metadata-file ../tests/data/annotation/metadata/convention/brain_rf1/patchseq_classic_metadata_has_modality_10.tsv --study-accession SCPPR344 --ingest-cell-metadata --validate-convention --has-modality "['electrophysiology', 'morphology']"
2928
3029
# Ingest dense file
3130
python ingest_pipeline.py --study-id 5d276a50421aa9117c982845 --study-file-id 5dd5ae25421aa910a723a337 ingest_expression --taxon-name 'Homo sapiens' --taxon-common-name human --ncbi-taxid 9606 --matrix-file ../tests/data/dense_matrix_19_genes_1000_cells.txt --matrix-file-type dense
@@ -113,7 +112,6 @@
113112
from validation.validate_metadata import (
114113
report_issues,
115114
validate_input_metadata,
116-
write_metadata_to_bq,
117115
)
118116
from cell_metadata import CellMetadata
119117
from cli_parser import create_parser, validate_arguments
@@ -347,27 +345,6 @@ def get_cluster_query(self):
347345

348346
return query
349347

350-
def upload_metadata_to_bq(self):
351-
"""Uploads metadata to BigQuery"""
352-
if self.kwargs["validate_convention"] is not None:
353-
if (
354-
self.kwargs["validate_convention"]
355-
and self.kwargs["bq_dataset"]
356-
and self.kwargs["bq_table"]
357-
):
358-
write_status = write_metadata_to_bq(
359-
self.cell_metadata,
360-
self.kwargs["bq_dataset"],
361-
self.kwargs["bq_table"],
362-
)
363-
return write_status
364-
else:
365-
IngestPipeline.dev_logger.error(
366-
"Erroneous call to upload_metadata_to_bq"
367-
)
368-
return 1
369-
return 0
370-
371348
@custom_metric(config.get_metric_properties)
372349
def ingest_expression(self) -> int:
373350
"""
@@ -681,12 +658,6 @@ def run_ingest(ingest, arguments, parsed_args):
681658
config.set_parent_event_name("ingest-pipeline:cell_metadata:ingest")
682659
status_cell_metadata_validation = ingest.validate_cell_metadata()
683660
status.append(status_cell_metadata_validation)
684-
if (
685-
parsed_args.bq_table is not None
686-
and status_cell_metadata_validation == 0
687-
):
688-
status_metadata_bq = ingest.upload_metadata_to_bq()
689-
status.append(status_metadata_bq)
690661
if status_cell_metadata_validation == 0:
691662
if ingest.kwargs['has_modality'] is not None:
692663
ingest.cell_metadata.file = CellMetadata.restore_modality_metadata(

ingest/validation/metadata_validation.py

Lines changed: 2 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
# generate an issues.json file to compare with reference test files
1414
$ python3 metadata_validation.py --issues-json ../../tests/data/annotation/metadata/convention/valid_no_array_v2.0.0.tsv
1515
16-
# generate a BigQuery upload file to compare with reference test files
17-
$ python3 metadata_validation.py --bq-json ../../tests/data/annotation/metadata/convention/valid_no_array_v2.0.0.tsv
18-
1916
# use a different metadata convention for validation
2017
$ python3 metadata_validation.py --convention <path to convention json> ../../tests/data/annotation/metadata/convention/valid_no_array_v2.0.0.tsv
2118
@@ -37,12 +34,10 @@
3734
import copy
3835
import itertools
3936
import math
40-
import pandas as pd
4137

4238
import colorama
4339
from colorama import Fore
4440
import jsonschema
45-
from google.cloud import bigquery
4641

4742
sys.path.append("..")
4843
try:
@@ -51,7 +46,6 @@
5146
from validation.validate_metadata import (
5247
report_issues,
5348
validate_input_metadata,
54-
write_metadata_to_bq,
5549
serialize_issues,
5650
exit_if_errors,
5751
)
@@ -84,12 +78,9 @@ def create_parser():
8478
# to generate reference output for tests
8579
parser.add_argument("--issues-json", action="store_true")
8680
# helper param to create JSON representation of convention metadata
87-
# to generate json for bigquery testing
88-
parser.add_argument("--bq-json", action="store_true")
8981
# overwrite existing output
9082
parser.add_argument("--force", action="store_true")
91-
# test BigQuery upload functions
92-
parser.add_argument("--upload", action="store_true")
83+
9384
# validate_metadata.py CLI only for dev, bogus defaults below shouldn't propagate
9485
# make bogus defaults obviously artificial for ease of detection
9586
parser.add_argument(
@@ -105,12 +96,6 @@ def create_parser():
10596
parser.add_argument(
10697
"--study-accession", help="SCP study accession", default="SCPtest"
10798
)
108-
parser.add_argument(
109-
"--bq-dataset", help="BigQuery dataset identifier", default="cell_metadata"
110-
)
111-
parser.add_argument(
112-
"--bq-table", help="BigQuery table identifier", default="alexandria_convention"
113-
)
11499
parser.add_argument(
115100
"--convention",
116101
help="Metadata convention JSON file",
@@ -120,24 +105,9 @@ def create_parser():
120105
return parser
121106

122107

123-
def check_if_old_output():
124-
"""Exit if old output files found"""
125-
output_files = ["bq.json"]
126-
127-
old_output = False
128-
for file in output_files:
129-
if os.path.exists(file):
130-
print(f"{file} already exists, please delete file and try again")
131-
old_output = True
132-
if old_output:
133-
exit(1)
134-
135-
136108
if __name__ == "__main__":
137109
args = create_parser().parse_args()
138110
arguments = vars(args)
139-
if not args.force:
140-
check_if_old_output()
141111

142112
with open(args.convention, "r") as f:
143113
convention = json.load(f)
@@ -150,10 +120,8 @@ def check_if_old_output():
150120
metadata.preprocess(True)
151121
print("Validating", args.input_metadata)
152122

153-
validate_input_metadata(metadata, convention, args.bq_json)
123+
validate_input_metadata(metadata, convention)
154124
if args.issues_json:
155125
serialize_issues(metadata)
156126
report_issues(metadata)
157-
if args.upload:
158-
write_metadata_to_bq(metadata, args.bq_dataset, args.bq_table)
159127
exit_if_errors(metadata)
536 Bytes
Binary file not shown.
1.08 KB
Binary file not shown.
36 Bytes
Binary file not shown.
442 Bytes
Binary file not shown.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1744734811 # validation cache key
1+
1749563342 # validation cache key

0 commit comments

Comments
 (0)