Merge pull request #132 from pepkit/dev

khoroshevskyi · web-flow · commit fac0ccd1a1fd · 2024-02-05T11:09:01.000-05:00
Release 0.12.6
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -1,6 +1,3 @@
-# This workflows will upload a Python Package using Twine when a release is created
-# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
-
 name: Upload Python Package
 
 on:
@@ -9,9 +6,10 @@ on:
 
 jobs:
   deploy:
-
+    name: upload release to PyPI
     runs-on: ubuntu-latest
-
+    permissions:
+      id-token: write
     steps:
     - uses: actions/checkout@v2
     - name: Set up Python
@@ -23,9 +21,7 @@ jobs:
         python -m pip install --upgrade pip
         pip install setuptools wheel twine
     - name: Build and publish
-      env:
-        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
-        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
       run: |
         python setup.py sdist bdist_wheel
-        twine upload dist/*
+    - name: Publish package distributions to PyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/run-codecov.yml b/.github/workflows/run-codecov.yml
diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: ["3.8", "3.11"]
+        python-version: ["3.8", "3.12"]
         os: [ubuntu-latest]
 
     steps:
diff --git a/docs/README.md b/docs/README.md
@@ -54,6 +54,10 @@ geofetch -i GSE95654 --just-metadata
 geofetch -i GSE95654 --processed --just-metadata
 ```
 
+
+&#8291;**Note:** We ensure that GEOfetch is compatible with Unix, Linux, and Mac OS X. 
+However, due to dependencies, some features of GEOfetch may not be available on Windows.
+
 ### Check out what exactly argument you want to use to download data:
 
 ![](./img/arguments_outputs.svg)
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,5 +1,8 @@
 # Changelog
 
+## [0.12.6] -- 2024-02-05
+- Updated support for Windows in Prefetch (Note: Some functionality may still be unavailable on Windows)
+
 ## [0.12.5] -- 2023-11-29
 - Fixed bug, where description was not populated in PEP
 
diff --git a/docs_jupyter/python-usage.ipynb b/docs_jupyter/python-usage.ipynb
@@ -138,7 +138,13 @@
     }
    ],
    "source": [
-    "geof = Geofetcher(processed=True, data_source=\"all\", const_limit_project = 20, const_limit_discard = 500, attr_limit_truncate = 10000 )"
+    "geof = Geofetcher(\n",
+    "    processed=True,\n",
+    "    data_source=\"all\",\n",
+    "    const_limit_project=20,\n",
+    "    const_limit_discard=500,\n",
+    "    attr_limit_truncate=10000,\n",
+    ")"
    ]
   },
   {
@@ -418,7 +424,7 @@
     }
    ],
    "source": [
-    "len(projects['GSE95654_samples'].samples)"
+    "len(projects[\"GSE95654_samples\"].samples)"
    ]
   },
   {
@@ -684,7 +690,7 @@
     }
    ],
    "source": [
-    "projects['GSE95654_samples'].sample_table.iloc[:15 , :5]"
+    "projects[\"GSE95654_samples\"].sample_table.iloc[:15, :5]"
    ]
   }
  ],
diff --git a/geofetch/__init__.py b/geofetch/__init__.py
@@ -1,4 +1,5 @@
 """ Package-level data """
+
 import logmuse
 import coloredlogs
 
diff --git a/geofetch/_version.py b/geofetch/_version.py
@@ -1 +1 @@
-__version__ = "0.12.5"
+__version__ = "0.12.6"
diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py
@@ -11,7 +11,7 @@
 from rich.progress import track
 import re
 import logmuse
-from ubiquerg import expandpath, is_command_callable
+from ubiquerg import expandpath
 from typing import List, Union, Dict, Tuple, NoReturn
 import peppy
 import pandas as pd
@@ -59,6 +59,7 @@
     _filter_gsm,
     _unify_list_keys,
     gse_content_to_dict,
+    is_prefetch_callable,
 )
 
 _LOGGER = logging.getLogger(__name__)
@@ -371,10 +372,10 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje
 
         # check to make sure prefetch is callable
         if not self.just_metadata and not self.processed:
-            if not is_command_callable("prefetch"):
+            if not is_prefetch_callable():
                 raise SystemExit(
-                    "To download raw data You must first install the sratoolkit, with prefetch in your PATH."
-                    " Installation instruction: http://geofetch.databio.org/en/latest/install/"
+                    "To download raw data, you must first install the sratoolkit, with prefetch in your PATH. "
+                    "Installation instruction: http://geofetch.databio.org/en/latest/install/"
                 )
 
         acc_GSE_list = parse_accessions(
@@ -546,9 +547,9 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje
                     name=self.project_name,
                     meta_processed_samples=processed_metadata_samples,
                     meta_processed_series=processed_metadata_series,
-                    gse_meta_dict=file_gse_content_dict
-                    if len(acc_GSE_list.keys()) == 1
-                    else None,
+                    gse_meta_dict=(
+                        file_gse_content_dict if len(acc_GSE_list.keys()) == 1 else None
+                    ),
                 )
                 if self.just_object:
                     return return_value
@@ -559,9 +560,9 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje
                 f"{self.project_name}_PEP",
                 metadata_dict_combined,
                 subannotation_dict_combined,
-                gse_meta_dict=file_gse_content_dict
-                if len(acc_GSE_list.keys()) == 1
-                else None,
+                gse_meta_dict=(
+                    file_gse_content_dict if len(acc_GSE_list.keys()) == 1 else None
+                ),
             )
             if self.just_object:
                 return return_value
@@ -1036,7 +1037,7 @@ def _write_processed_annotation(
         )
 
         if not just_object:
-            with open(file_annotation_path, "w") as m_file:
+            with open(file_annotation_path, "w", encoding="utf-8") as m_file:
                 dict_writer = csv.DictWriter(m_file, processed_metadata[0].keys())
                 dict_writer.writeheader()
                 dict_writer.writerows(processed_metadata)
@@ -1789,15 +1790,22 @@ def _download_processed_file(self, file_url: str, data_folder: str) -> bool:
                 return True
 
             except IOError as e:
-                _LOGGER.error(str(e))
-                # The server times out if we are hitting it too frequently,
-                # so we should sleep a bit to reduce frequency
-                sleeptime = (ntry + 1) ** 3
-                _LOGGER.info(f"Sleeping for {sleeptime} seconds")
-                time.sleep(sleeptime)
-                ntry += 1
-                if ntry > 4:
-                    raise e
+                if os.name == "nt":
+                    _LOGGER.error(f"{e}")
+                    raise OSError(
+                        "Windows may not have wget command. "
+                        "Check if `wget` command is installed correctly."
+                    )
+                else:
+                    _LOGGER.error(str(e))
+                    # The server times out if we are hitting it too frequently,
+                    # so we should sleep a bit to reduce frequency
+                    sleeptime = (ntry + 1) ** 3
+                    _LOGGER.info(f"Sleeping for {sleeptime} seconds")
+                    time.sleep(sleeptime)
+                    ntry += 1
+                    if ntry > 4:
+                        raise e
 
     def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None):
         """
@@ -1865,12 +1873,13 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None):
             else:
                 # open existing annotation
                 _LOGGER.info("Found SRA metadata, opening..")
-                with open(file_sra, "r") as m_file:
+                with open(file_sra, "r", encoding="UTF-8") as m_file:
                     reader = csv.reader(m_file)
                     file_list = []
                     srp_list = []
                     for k in reader:
-                        file_list.append(k)
+                        if k:
+                            file_list.append(k)
                     for value_list in file_list[1:]:
                         srp_list.append(dict(zip(file_list[0], value_list)))
 
diff --git a/geofetch/utils.py b/geofetch/utils.py
@@ -275,7 +275,7 @@ def fetch_metadata(
                 os.makedirs(dirpath)
 
             # save file:
-            with open(outpath, "w") as f:
+            with open(outpath, "w", encoding="utf-8") as f:
                 f.write(result_text)
 
         return result_list
@@ -757,3 +757,22 @@ def gse_content_to_dict(gse_content: List[str]) -> Dict[str, dict]:
                 gse_dict[new_key] = new_value
 
     return {"experiment_metadata": gse_dict}
+
+
+def is_prefetch_callable() -> bool:
+    """
+    Test if the prefetch command can be run.
+
+    :return: True if it is available.
+    """
+    try:
+        # Option -V means display version and then quit.
+        subprocess.run(
+            ["prefetch", "-V"],
+            check=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+        return True
+    except (subprocess.SubprocessError, OSError):
+        return False
diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt
@@ -1,10 +1,9 @@
-attmap>=0.1.8
 colorama>=0.3.9
 logmuse>=0.2.6
 ubiquerg>=0.6.2
 requests>=2.28.1
 xmltodict>=0.13.0
 pandas>=1.5.3
-peppy>=0.35.3
+peppy>=0.40.0
 rich>=12.5.1
 coloredlogs>=15.0.1

Original file line number	Diff line number	Diff line change
`@@ -138,7 +138,13 @@`
`138`	`138`	`}`
`139`	`139`	`],`
`140`	`140`	`"source": [`
`141`		`- "geof = Geofetcher(processed=True, data_source=\"all\", const_limit_project = 20, const_limit_discard = 500, attr_limit_truncate = 10000 )"`
	`141`	`+ "geof = Geofetcher(\n",`
	`142`	`+ " processed=True,\n",`
	`143`	`+ " data_source=\"all\",\n",`
	`144`	`+ " const_limit_project=20,\n",`
	`145`	`+ " const_limit_discard=500,\n",`
	`146`	`+ " attr_limit_truncate=10000,\n",`
	`147`	`+ ")"`
`142`	`148`	`]`
`143`	`149`	`},`
`144`	`150`	`{`
`@@ -418,7 +424,7 @@`
`418`	`424`	`}`
`419`	`425`	`],`
`420`	`426`	`"source": [`
`421`		`- "len(projects['GSE95654_samples'].samples)"`
	`427`	`+ "len(projects[\"GSE95654_samples\"].samples)"`
`422`	`428`	`]`
`423`	`429`	`},`
`424`	`430`	`{`
`@@ -684,7 +690,7 @@`
`684`	`690`	`}`
`685`	`691`	`],`
`686`	`692`	`"source": [`
`687`		`- "projects['GSE95654_samples'].sample_table.iloc[:15 , :5]"`
	`693`	`+ "projects[\"GSE95654_samples\"].sample_table.iloc[:15, :5]"`
`688`	`694`	`]`
`689`	`695`	`}`
`690`	`696`	`],`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`""" Package-level data """`
	`2`	`+`
`2`	`3`	`import logmuse`
`3`	`4`	`import coloredlogs`
`4`	`5`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.12.5"`
	`1`	`+__version__ = "0.12.6"`