Skip to content

Commit 58bb467

Browse files
Merge pull request #144 from pepkit/dev
Release 0.12.8
2 parents 89c0a73 + a0ff9dc commit 58bb467

21 files changed

+113
-85
lines changed

.github/workflows/black.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ jobs:
66
lint:
77
runs-on: ubuntu-latest
88
steps:
9-
- uses: actions/checkout@v2
10-
- uses: actions/setup-python@v2
9+
- uses: actions/checkout@v4
10+
- uses: actions/setup-python@v5
1111
- uses: psf/black@stable

.github/workflows/python-publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@ jobs:
2424
run: |
2525
python setup.py sdist bdist_wheel
2626
- name: Publish package distributions to PyPI
27-
uses: pypa/gh-action-pypi-publish@release/v1
27+
uses: pypa/gh-action-pypi-publish@release/v1

MANIFEST.in

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
include requirements/*
22
include README.md
33
include docs/img/geofetch_logo.svg
4-
include geofetch/config_template.yaml
5-
include geofetch/config_processed_template.yaml
6-
include geofetch/looper_sra_convert.yaml
4+
include geofetch/templates/*
5+
include geofetch/templates/config_template.yaml
6+
include geofetch/templates/config_processed_template.yaml
7+
include geofetch/templates/looper_sra_convert.yaml
8+
include geofetch/templates/looper_config_template.yaml
9+
include geofetch/templates/pipeline_interface_convert.yaml

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ or install the latest version from the GitHub repository:
4444
pip install git+https://github.com/pepkit/geofetch.git
4545
```
4646

47+
## All GEO projects (GSE + GSM) in PEP format.
48+
49+
All GEO projects are available in PEPhub under geo namespace: https://pephub.databio.org/geo/ .
50+
User can search for GEO projects using the search bar, or download archive with all GEO PEPs from archive section of the namespace:
51+
[https://pephub.databio.org/geo?view=archive](https://pephub.databio.org/geo?view=archive)
52+
4753

4854
## How to cite:
4955
https://doi.org/10.1093/bioinformatics/btad069

geofetch/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
""" Package-level data """
1+
"""Package-level data"""
22

33
import coloredlogs
44
import logmuse

geofetch/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.12.7"
1+
__version__ = "0.12.8"

geofetch/cli.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33

44
import logmuse
5+
from ubiquerg import VersionInHelpParser
56

67
from geofetch._version import __version__
78

@@ -15,7 +16,7 @@ def _parse_cmdl(cmdl):
1516
"""
1617
parser
1718
"""
18-
parser = argparse.ArgumentParser(
19+
parser = VersionInHelpParser(
1920
description="Automatic GEO and SRA data downloader",
2021
usage="""geofetch [<args>]
2122
@@ -26,15 +27,12 @@ def _parse_cmdl(cmdl):
2627
geofetch -i GSE67303 --processed --geo-folder <folder> -m <folder>
2728
2829
""",
30+
version=__version__,
2931
)
3032

3133
processed_group = parser.add_argument_group("processed")
3234
raw_group = parser.add_argument_group("raw")
3335

34-
parser.add_argument(
35-
"-V", "--version", action="version", version=f"%(prog)s {__version__}"
36-
)
37-
3836
# Required
3937
parser.add_argument(
4038
"-i",

geofetch/const.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,14 @@
4444

4545
NEW_GENOME_COL_NAME = "ref_genome"
4646

47+
TEMPLATES_DIR = "templates"
4748
CONFIG_PROCESSED_TEMPLATE_NAME = "config_processed_template.yaml"
4849
CONFIG_RAW_TEMPLATE_NAME = "config_template.yaml"
49-
CONFIG_SRA_TEMPLATE = "looper_sra_convert.yaml"
50+
CONFIG_SRA_TEMPLATE_NAME = "looper_sra_convert.yaml"
51+
PIPELINE_INTERFACE_CONVERT_TEMPLATE_NAME = "pipeline_interface_convert.yaml"
52+
LOOPER_SRA_CONVERT = "looper_config_template.yaml"
53+
# SRA_CONVERT_SCHEMA_NAME = "sra_convert_schema.yaml"
54+
# RESOURCES_NAME = "resources.tsv"
5055

5156
# const for Finder:
5257
RETMAX = 10000000 # once it should be increased
@@ -63,3 +68,5 @@
6368
'+AND+("{start_date}"[Publication%20Date]%20:%20"{end_date}"[Publication%20Date])'
6469
)
6570
THREE_MONTH_FILTER = '+AND+"published+last+3+months"[Filter]'
71+
72+
LOOPER_CONFIG_FILE_NAME = "looper_config.yaml"

geofetch/geofetch.py

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from geofetch.const import (
2121
CONFIG_PROCESSED_TEMPLATE_NAME,
2222
CONFIG_RAW_TEMPLATE_NAME,
23-
CONFIG_SRA_TEMPLATE,
23+
CONFIG_SRA_TEMPLATE_NAME,
2424
EXP_SUPP_METADATA_FILE,
2525
EXPERIMENT_PATTERN,
2626
FILE_RAW_NAME_SAMPLE_PATTERN,
@@ -34,6 +34,10 @@
3434
SAMPLE_SUPP_METADATA_FILE,
3535
SER_SUPP_FILE_PATTERN,
3636
SUPP_FILE_PATTERN,
37+
TEMPLATES_DIR,
38+
PIPELINE_INTERFACE_CONVERT_TEMPLATE_NAME,
39+
LOOPER_SRA_CONVERT,
40+
LOOPER_CONFIG_FILE_NAME,
3741
)
3842
from geofetch.utils import (
3943
Accession,
@@ -867,6 +871,8 @@ def _expand_metadata_list(self, metadata_list: list) -> list:
867871
_LOGGER.info("Expanding metadata list...")
868872
list_of_keys = _get_list_of_keys(metadata_list)
869873
for key_in_list in list_of_keys:
874+
if key_in_list == "Sample_characteristics_ch1":
875+
pass
870876
metadata_list = self._expand_metadata_list_item(metadata_list, key_in_list)
871877
return metadata_list
872878

@@ -881,7 +887,13 @@ def _expand_metadata_list_item(self, metadata_list: list, dict_key: str):
881887
"""
882888
try:
883889
element_is_list = any(
884-
isinstance(list_item.get(dict_key), list) for list_item in metadata_list
890+
isinstance(list_item.get(dict_key), list)
891+
or (
892+
len(list_item.get(dict_key).split(": ")) == 2
893+
if list_item.get(dict_key)
894+
else False
895+
)
896+
for list_item in metadata_list
885897
)
886898

887899
# # checking if some items have two keys:
@@ -900,6 +912,8 @@ def _expand_metadata_list_item(self, metadata_list: list, dict_key: str):
900912
metadata_list[n_elem][dict_key] = [
901913
metadata_list[n_elem][dict_key]
902914
]
915+
else:
916+
pass
903917

904918
just_string = False
905919
this_string = ""
@@ -1087,7 +1101,7 @@ def _find_genome(metadata_list: list) -> list:
10871101
sample_genome = ""
10881102
for key in proj_gen_keys:
10891103
sample_genome = " ".join([sample_genome, sample[1][key]])
1090-
metadata_list[sample[0]][NEW_GENOME_COL_NAME] = sample_genome
1104+
metadata_list[sample[0]][NEW_GENOME_COL_NAME] = sample_genome.strip()
10911105
return metadata_list
10921106

10931107
def _write_raw_annotation_new(
@@ -1161,11 +1175,43 @@ def _write_raw_annotation_new(
11611175
if len(subannot_dict) > 0:
11621176
self._write_subannotation(subannot_dict, proj_root_subsample)
11631177

1164-
self._write(proj_root_yaml, template, msg_pre=" Config file: ")
1178+
self._write(proj_root_yaml, template, msg_pre="Config file: ")
11651179

11661180
if self.add_dotfile:
11671181
_create_dot_yaml(dot_yaml_path, yaml_name)
11681182

1183+
if self.add_convert_modifier:
1184+
geofetchdir = os.path.dirname(__file__)
1185+
pipeline_interface_convert_path = os.path.join(
1186+
geofetchdir, TEMPLATES_DIR, PIPELINE_INTERFACE_CONVERT_TEMPLATE_NAME
1187+
)
1188+
1189+
looper_config_template_path = os.path.join(
1190+
geofetchdir, TEMPLATES_DIR, LOOPER_SRA_CONVERT
1191+
)
1192+
1193+
with open(looper_config_template_path, "r") as template_file:
1194+
template_looper = template_file.read()
1195+
1196+
template_values = {
1197+
"pep_config": proj_root_yaml,
1198+
"output_dir": os.path.join(self.metadata_root_full, "output_dir"),
1199+
"pipeline_interface_convert": pipeline_interface_convert_path,
1200+
}
1201+
1202+
for k, v in template_values.items():
1203+
placeholder = "{" + str(k) + "}"
1204+
template_looper = template_looper.replace(placeholder, str(v))
1205+
1206+
looper_config_file = os.path.join(
1207+
self.metadata_root_full,
1208+
LOOPER_CONFIG_FILE_NAME,
1209+
)
1210+
1211+
self._write(
1212+
looper_config_file, template_looper, msg_pre="Looper config file: "
1213+
)
1214+
11691215
else:
11701216
meta_df = pd.DataFrame.from_dict(metadata_dict, orient="index")
11711217

@@ -1204,8 +1250,11 @@ def _create_config_processed(
12041250
:param meta_in_series:
12051251
:return: generated, complete config file content
12061252
"""
1253+
12071254
geofetchdir = os.path.dirname(__file__)
1208-
config_template = os.path.join(geofetchdir, CONFIG_PROCESSED_TEMPLATE_NAME)
1255+
config_template = os.path.join(
1256+
geofetchdir, TEMPLATES_DIR, CONFIG_PROCESSED_TEMPLATE_NAME
1257+
)
12091258
with open(config_template, "r") as template_file:
12101259
template = template_file.read()
12111260
meta_list_str = [
@@ -1260,9 +1309,13 @@ def _create_config_raw(
12601309
else:
12611310
sample_modifier_str = ""
12621311
if not self.config_template:
1263-
self.config_template = os.path.join(geofetchdir, CONFIG_RAW_TEMPLATE_NAME)
1312+
self.config_template = os.path.join(
1313+
geofetchdir, TEMPLATES_DIR, CONFIG_RAW_TEMPLATE_NAME
1314+
)
12641315
if self.add_convert_modifier:
1265-
sra_convert_path = os.path.join(geofetchdir, CONFIG_SRA_TEMPLATE)
1316+
sra_convert_path = os.path.join(
1317+
geofetchdir, TEMPLATES_DIR, CONFIG_SRA_TEMPLATE_NAME
1318+
)
12661319
with open(sra_convert_path, "r") as template_file:
12671320
sra_convert_template = template_file.read()
12681321
else:
@@ -1291,6 +1344,7 @@ def _create_config_raw(
12911344
for k, v in template_values.items():
12921345
placeholder = "{" + str(k) + "}"
12931346
template = template.replace(placeholder, str(v))
1347+
12941348
return template
12951349

12961350
@staticmethod

geofetch/sraconvert.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import os
44
import sys
5-
from argparse import ArgumentParser
5+
from ubiquerg import VersionInHelpParser
66

77
import logmuse
88
import pypiper
@@ -15,7 +15,7 @@ def _parse_cmdl(cmdl):
1515
provides convenience functions for converting or deleting sra data in
1616
various formats.
1717
"""
18-
parser = ArgumentParser(description=description)
18+
parser = VersionInHelpParser(description=description)
1919
# parser = pypiper.add_pypiper_args(parser, args=["output-parent"])
2020
parser.add_argument(
2121
"-m",
@@ -72,6 +72,9 @@ def _parse_cmdl(cmdl):
7272
help="Name for sample to run",
7373
metavar="SAMPLE_NAME",
7474
)
75+
parser.add_argument(
76+
"-V", "--version", action="version", version=f"%(prog)s {__version__}"
77+
)
7578

7679
parser.add_argument("-r", "--srr", required=True, nargs="+", help="SRR files")
7780

0 commit comments

Comments
 (0)