Skip to content

Develop #580

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions isatools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
sra2isatab as sra2isatab_module,
)

from isatools.utils import (
detect_graph_process_pooling as detect_graph_process_pooling_module
)

# isatools.convert packages
isatab2cedar = isatab2cedar_module
Expand All @@ -66,3 +69,6 @@
ols = ols_module
pubmed = pubmed_module
sra2isatab = sra2isatab_module

# isatools.utils packages
detect_graph_process_pooling = detect_graph_process_pooling_module
1 change: 1 addition & 0 deletions isatools/convert/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from isatools.convert import json2sra
5 changes: 2 additions & 3 deletions isatools/examples/createSimpleISAJSON.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ def create_descriptor():
# Adding the description to the ISA Source Material:
source.characteristics.append(characteristic_organism)
study.sources.append(source)

# declaring a new ontology and adding it to the list of resources used
uberon = OntologySource(name='UBERON', description='Uber Anatomy Ontology')
investigation.ontology_source_references.append(uberon)
Expand Down Expand Up @@ -179,7 +178,7 @@ def create_descriptor():
f.comments.append(Comment(name="Study Start Date", value="Saturn"))
f.comments.append(Comment(name="Study End Date", value="2039-12-12"))
print(f.comments[0].name, "|", f.comments[0].value)

print(study.design_descriptors)
# checking that the ISA Factor object has been modified
study.factors.append(f)

Expand Down Expand Up @@ -255,7 +254,7 @@ def create_descriptor():
sequencing_process.name = "assay-name-{}".format(i)
sequencing_process.inputs.append(extraction_process.outputs[0])

# Sequencing process usually has an output data file
# Sequencing process usually has an output data file.

datafile = DataFile(filename="sequenced-data-{}".format(i),
label="Raw Data File", generated_from=[sample])
Expand Down
1 change: 1 addition & 0 deletions isatools/net/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
"""This package provides modules for using network services"""

4 changes: 3 additions & 1 deletion isatools/net/ax.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get(arrayexpress_id, target_dir=None):
This function downloads MAGE-TAB content from the ArrayExpress FTP site.

:param arrayexpress_id: Experiment identifier for ArrayExpress study to
get, as a str (e.g. E-GEOD-59671)
get, as a str (e.g., E-GEOD-59671)
:param target_dir: Path to write MAGE-TAB files to. If None, writes to
temporary directory (generated on the fly)
:return: Path where the files were written to
Expand All @@ -54,9 +54,11 @@ def get(arrayexpress_id, target_dir=None):
ftp.cwd('{base_dir}/{exp_type}/{arrayexpress_id}'.format(
base_dir=AX_EXPERIMENT_BASE_DIR, exp_type=exp_type,
arrayexpress_id=arrayexpress_id))
# this won't get set if there is no remote file or the ftp.cwd fails
if target_dir is None:
target_dir = tempfile.mkdtemp()
log.info("Using directory '{}'".format(target_dir))

idf_filename = "{}.idf.txt".format(arrayexpress_id)
with open(os.path.join(target_dir, idf_filename),
'wb') as out_file:
Expand Down
8 changes: 4 additions & 4 deletions isatools/net/biocrates2isatab.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@


def replaceAll(file, searchExp, replaceExp):
for line in fileinput.input(file, inplace=1):
for line in fileinput.input(file, inplace=True):
if searchExp in line:
line = line.replace(searchExp, replaceExp)
sys.stdout.write(line)
Expand Down Expand Up @@ -206,7 +206,7 @@ def biocrates_to_isatab_convert(biocrates_filename, saxon_jar_path=DEFAULT_SAXON
logger.debug(err)

with ZipFile(buffer, 'w') as zip_file:
# use relative dir_name to avoid absolute path on file names
# use relative dir_name to avoid an absolute path on file names
zipdir(dir_name, zip_file)
logger.debug("!", zip_file.namelist())

Expand Down Expand Up @@ -247,8 +247,8 @@ def generatePolarityAttrsDict(plate, polarity, myAttrs, myMetabolites, mydict):
myMblite = p.get('metabolite')
if myMblite not in myMetabolitesList:
myMetabolitesList.append(myMblite)
# it is assume that the rawdatafilename is unique in each of the
# plate grouping and polarity
# it is assumed that the rawdatafilename is unique in each of the
# plate groupings and polarity
myAttrs[pi.get('rawdatafilename').split('.')[0]] = myAttrList
myMetabolites[usedop + '-' + platebarcode + '-' + polarity.lower()] = myMetabolitesList
return myAttrs, mydict
Expand Down
2 changes: 1 addition & 1 deletion isatools/net/sra2isatab.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def sra_to_isatab_batch_convert(sra_acc_numbers, saxon_jar_path=DEFAULT_SAXON_EX
zipdir(dir_name, zip_file)

except subprocess.CalledProcessError as err:
log.error('isatools.convert.sra2isatab: CalledProcessError caught ', err.returncode)
log.error('isatools.net.sra2isatab: CalledProcessError caught ', err.returncode)
buffer.seek(0)
finally:
log.debug('Removing dir' + destination_dir)
Expand Down
64 changes: 34 additions & 30 deletions isatools/net/storage_adapter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
"""Storage Adapter for accessing ISA content in Github"""
"""Storage Adapter for accessing ISA content in GitHub"""
import base64
import json
import logging
Expand Down Expand Up @@ -95,9 +95,9 @@ def __init__(self, username=None, password=None, note=None, scopes=('gist', 'rep
Initialize an ISA Storage Adapter to perform CRUD operations on a
remote GitHub repository

If credentlials are provided (username, password) th recommended use
is in a with command, to allow correct
authrization management.
If credentials are provided (username, password), the recommended use
is in a with command to allow correct
authorization management.
For instance:

with IsaGitHubStorageAdapter('user', 'passw', 'test auth') as adapter:
Expand All @@ -106,12 +106,11 @@ def __init__(self, username=None, password=None, note=None, scopes=('gist', 'rep
...

:param username: str - the (optional) GitHub user login
:param password: str - the (optional) Github password for user
:param note str - an (optional) note explaining the nature of the
authorizations.
:param password: str - the (optional) GitHub password for user
:param note str - an (optional) note explaining the nature of the authorizations
:param scopes tuple - a tuple containing the scopes
(see https://developer.github.com/v3/oauth/#scopes)
for the current authorization (if username and password are provided.
for the current authorization (if username and password are provided).
"""
self._authorization = {}
if username and password:
Expand Down Expand Up @@ -139,13 +138,15 @@ def __init__(self, username=None, password=None, note=None, scopes=('gist', 'rep

if res.status_code == requests.codes.created:
self._authorization = json.loads(res.text or res.content)
else:
raise Exception

def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):
"""
Delete the authorization on destruction, if it was created by the
Delete the authorization on destruction if it was created by the
constructor
"""
self.close()
Expand All @@ -169,25 +170,27 @@ def close(self):
Method to delete the authorization, if it was created by the
constructor
"""

if self.is_authenticated:
headers = {'accept': 'application/json'}
r = requests.delete(self.authorization_uri, headers=headers, auth=(self._username, self._password))
log.debug(r)

return r.raise_for_status()

def download(self, source, destination='isa-target', owner='ISA-tools', repository='isa-api', validate_json=False):
"""
Call to download a resource from a remote GitHub repository
call to download a resource from a remote GitHub repository

:type source: str - URLish path to the source (within the GitHub
repository)
:type destination str
:type owner str
:type repository str
:type validate_json bool - if True perform validation against a
JSON schema (i.e. investigation schema). Valid only for JSON datasets
JSON schema (i.e., investigation schema). Valid only for JSON datasets.
"""
# get the content at source as raw data
# get the content at the source as raw data
get_content_frag = '/'.join([REPOS, owner, repository, CONTENTS, source])
headers = {'Authorization': 'token %s' % self.token, 'Accept': GITHUB_RAW_MEDIA_TYPE}
res = requests.get(urljoin(GITHUB_API_BASE_URL, get_content_frag), headers=headers)
Expand All @@ -203,7 +206,7 @@ def download(self, source, destination='isa-target', owner='ISA-tools', reposito
# then download all the items in the directory
return self._download_dir(source.split('/')[-1], destination, res_payload)

# if it is an object it's the file content to be stored
# if it is an object, it's the file content to be stored.
else:
# validate against JSON schema
if validate_json:
Expand Down Expand Up @@ -248,25 +251,24 @@ def retrieve(self, source, destination='isa-target', owner='ISA-tools',
Defaults to 'isa-api'
:param ref str - the name of commit/branch/tag. Defaults to
'master'
:param validate_json bool - if True perform validation against a
JSON schema (i.e. investigation schema). Valid only for JSON
datasets. Defaults to False
:param decode_content bool - if True it will decode the content
:param validate_json bool - if True, perform validation against a
JSON schema (i.e., investigation schema). Valid only for JSON
datasets. Default to False
:param decode_content bool - if True, it will decode the content
encoded in the payload, otherwise it will fire a second request to
retrieve the raw file. Defaults to True
retrieve the raw file. Default to True
:param write_to_file bool - if True writes the file to the
specified destination directory. Defaults to True

Returns:
:return dict - if the retrieved file contains a (valid) json
:return dict - if the retrieved file contains a (valid) JSON
document
:return XMLElement - if the retrieved file contains a valid ISA
XML configuration file
:return io.BytesIO - if the target is a directory of a ZIP file.
If it is a directory the zipped content of
it is returned as a binary stream.
If it is a directory, the zipped content of it is returned as a binary stream.
:return False - if the file downloaded is of an unauthorized type.
These file are not saved to disk
These files are not saved to disk

Raises:
:raise requests.exceptions.HTTPException when the request to
Expand All @@ -293,7 +295,7 @@ def retrieve(self, source, destination='isa-target', owner='ISA-tools',
return self._download_dir(source.split('/')[-1], destination,
res_payload, write_to_file)

# if it is an object decode the content (if the option is
# if it is an object, decodes the content (if the option is
# available)
elif decode_content:
processed_payload = self._handle_content(res_payload)
Expand Down Expand Up @@ -349,10 +351,10 @@ def _download_dir(self, directory, destination, dir_items, write_to_directory=No
for file in files:
file_name = file["name"]
res = requests.get(file['download_url'], headers=headers)
# if request went fine and the payload is a regular (ISA) text file write it to file
# if request went fine and the payload is a regular (ISA) text file, write it to file
if res.status_code == requests.codes.ok and res.headers['Content-Type'].split(";")[0] == 'text/plain':
# zip the text payload
zip_file.writestr(os.path.join(directory, file["name"]), res.text)
zip_file.writestr(os.path.join(directory, str(file["name"])), res.text)
# write to a target dir
if write_to_directory:
dir_path = os.path.join(destination, directory)
Expand All @@ -363,12 +365,14 @@ def _download_dir(self, directory, destination, dir_items, write_to_directory=No
buf.seek(0)
return buf

def _handle_content(self, payload, validate_json=False, char_set='utf-8'):
@staticmethod
def _handle_content(payload, validate_json=False, char_set='utf-8'):
"""
Handle file content, decoding its 'content' property, without firing
another GET request to GitHub
"""
# determine decoding strategy
decode_cmd = None
if payload['encoding'] == 'base64':
decode_cmd = base64.b64decode
elif payload['encoding'] == 'base32':
Expand All @@ -378,7 +382,7 @@ def _handle_content(self, payload, validate_json=False, char_set='utf-8'):
file_name = payload['name']
file_ext = file_name.split('.')[-1]

# if file is JSON
# if the file is JSON
if file_ext == 'json':
# try to parse the content as JSON and validate (if required)
decoded_content = decoded_content.decode(char_set)
Expand All @@ -387,14 +391,14 @@ def _handle_content(self, payload, validate_json=False, char_set='utf-8'):
validate_json_against_schema(json_content, INVESTIGATION_SCHEMA_FILE)
return {'json': json_content, 'text': decoded_content}

# if file is XML
# if the file is XML
elif file_ext == 'xml':
# try to parse the content as XML against configuration schema
decoded_content = decoded_content.decode(char_set)
xml = validate_xml_against_schema(decoded_content, CONFIGURATION_SCHEMA_FILE)
return {'xml': xml, 'text': decoded_content}

# if ZIP file return raw content
# if ZIP file, return raw content
elif file_ext == 'zip':
return {'content': decoded_content}

Expand All @@ -410,7 +414,7 @@ def _retrieve_file(self, file_uri, validate_json=False):
if r.status_code == requests.codes.ok:
content_type = r.headers['content-type'].split(';')[0]

# if content is a text file it might be a JSON or XML
# if content is a text file, it might be a JSON or XML
if content_type == 'text/plain':
try:
json_payload = json.loads(r.text or r.content)
Expand Down
8 changes: 4 additions & 4 deletions performances/isatab.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
File to profile the validation functions of ISAtab.
Do not comment what look like unused imports. They are being called in the form of a string by runctx.
Do not comment what looks like unused imports. They are being called in the form of a string by runctx.
Profiles are dumped in /performances/profiles/ and can be visualized using the following command:
`snakeviz ./performances/profiles/` from the project root directory.
Author: D. Batista (@Terazus)
Expand All @@ -22,9 +22,9 @@ def profile_validation(filename=None, output_path=None):
if output_path is None:
output_path = OUTPUT_PATH

with open(input_data_path, 'r') as data_file:
output_data_path = path.join(output_path, 'isatab_validation_mzml')
runctx('validate(data_file, mzml=True)', globals(), locals(), output_data_path)
# with open(input_data_path, 'r') as data_file:
# output_data_path = path.join(output_path, 'isatab_validation_mzml')
# runctx('validate(data_file, mzml=True)', globals(), locals(), output_data_path)

with open(input_data_path, 'r') as data_file:
output_data_path = path.join(output_path, 'isatab_validation')
Expand Down
Empty file added tests/__init__.py
Empty file.
Loading