diff --git a/requirements-dev.txt b/requirements-dev.txt index 3218b4d7..35f52bd4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,7 @@ # Requirements needed for running the test suite. +cached-property +dxpy flake8==2.4.1 freezegun==0.3.6 mock==1.3.0 @@ -8,3 +10,4 @@ testfixtures==4.7.0 coverage==4.4.1 pytest==3.2.3 pytest-cov==2.5.1 +vcrpy-unittest \ No newline at end of file diff --git a/stor/base.py b/stor/base.py index 3d0cd290..14a78e08 100644 --- a/stor/base.py +++ b/stor/base.py @@ -45,7 +45,9 @@ def __new__(cls, path): if cls is Path: if not hasattr(path, 'startswith'): raise TypeError('must be a string like') - if utils.is_swift_path(path): + if utils.is_dx_path(path): + cls = utils.find_dx_class(path) + elif utils.is_swift_path(path): from stor.swift import SwiftPath cls = SwiftPath diff --git a/stor/dx.py b/stor/dx.py new file mode 100644 index 00000000..ceed5fd1 --- /dev/null +++ b/stor/dx.py @@ -0,0 +1,513 @@ +from cached_property import cached_property +import six + +import dxpy +import dxpy.exceptions as dx_exceptions +from dxpy.exceptions import DXError + +from stor import exceptions as stor_exceptions +from stor import Path +from stor import utils +from stor.obs import OBSPath + + +DNAnexusError = stor_exceptions.RemoteError +NotFoundError = stor_exceptions.NotFoundError + + +def _parse_dx_error(exc, **kwargs): + """ + Parses DXError exceptions to throw a more informative exception. + """ + msg = exc.message + exc_type = type(exc) + + if exc_type is dx_exceptions.DXSearchError: + if msg and 'found more' in msg.lower(): + return DuplicateError(msg, exc) + elif msg and 'found none' in msg.lower(): + return NotFoundError(msg, exc) + + +class DuplicateError(DNAnexusError): + """Thrown when multiple objects exist with the same name + + Currently, we throw this when trying to get the canonical project + from virtual path and two or more projects were found with same name + """ + pass + + +class DuplicateProjectError(DuplicateError): + """Thrown when multiple projects exist with the same name + + Currently, we throw this when trying to get the canonical project + from virtual path and two or more projects were found with same name + """ + pass + + +class ProjectNotFoundError(NotFoundError): + """Thrown when no project exists with the given name + + Currently, we throw this when trying to get the canonical project + from virtual path and no project was found with same name + """ + pass + + +class DXPath(OBSPath): + """ + Provides the ability to manipulate and access resources on DNAnexus + with a similar interface to the path library. + """ + + def __new__(cls, path): + """Custom __new__ method so that the validation checks happen during creation + + This ensures invalid dx paths like DXPath('dx://) are never initialized + """ + return super(DXPath, cls).__new__(Path, path) + + drive = 'dx://' + + __stat = None + + def _get_parts(self): + """Returns the path parts (excluding the drive) as a list of strings.""" + parts = super(DXPath, self)._get_parts() + if len(parts) > 0 and parts[0]: + # first part can be 'proj:file' or 'proj:' or 'proj' + parts_first = parts[0].split(':') + parts[0] = parts_first[0] + if len(parts_first) > 1 and parts_first[1]: + parts.insert(1, parts_first[1]) + return parts + + def _is_folder(self): + return self.resource and not self.resource.ext and \ + not utils.is_valid_dxid(self.resource.lstrip('/'), 'file') + + def _noop(attr_name): + def wrapper(self): + return type(self)(self) + wrapper.__name__ = attr_name + wrapper.__doc__ = 'No-op for %r' % attr_name + return wrapper + + abspath = _noop('abspath') + realpath = _noop('realpath') + expanduser = _noop('expanduser') + + @property + def project(self): + """Returns the project name from the path or None""" + parts = self._get_parts() + return parts[0] if len(parts) > 0 and parts[0] else None + + def temp_url(self, lifetime=300, filename=None): + """Obtains a temporary URL to a DNAnexus data-object. + + Args: + lifetime (int): The time (in seconds) the temporary + URL will be valid + filename (str, optional): A urlencoded filename to use for + attachment, otherwise defaults to object name + """ + try: + if self.canonical_resource: + file_handler = dxpy.DXFile(self.canonical_resource) + return file_handler.get_download_url( + duration=lifetime, + preauthenticated=True, + filename=filename, + project=self.canonical_project + )[0] + else: + raise DXError('DX Projects cannot have a temporary download url') + except ValueError: + raise DXError('DXPaths ending in folders cannot have a temporary download url') + + @property + def resource(self): + parts = self._get_parts() + joined_resource = '/'.join(parts[1:]) if len(parts) > 1 else None + return self.parts_class('/'+joined_resource) if joined_resource else None + + def dirname(self): + if not self.resource: + return self + else: + return super(DXPath, self).dirname() + + def download_objects(self): # may not need it + raise NotImplementedError + + def remove(self): + raise NotImplementedError + + def rmtree(self): + raise NotImplementedError + + def isdir(self): + if not self.resource or self._is_folder(): + return self.exists() + return False + + def isfile(self): + try: + return self.resource and not self._is_folder() and self.exists() + except NotFoundError: + return False + + def getsize(self): + if not self.resource: + return self.stat()['dataUsage']*1e9 + elif self._is_folder(): + return 0 + else: + return self.stat()['size'] + + def download_object(self, dest): + """Download a single path or object to file.""" + raise NotImplementedError + + def download(self, dest): + """Download a directory.""" + raise NotImplementedError + + def upload(self, source): + """Upload a list of files and directories to a directory.""" + raise NotImplementedError + + def open(self, mode='r', encoding=None): + """ + Opens a OBSFile that can be read or written to and is uploaded to + the remote service. + """ + raise NotImplementedError + + def list(self, + canonicalize=False, + starts_with=None, + limit=None, + category=None, + condition=None + ): + """List contents using the resource of the path as a prefix. + + Args: + canonicalize (boolean): whether to return canonicalized paths + starts_with (str): Allows for an additional search path to + be appended to the resource of the dx path. Note that this + resource path is treated as a directory + limit (int): Limit the amount of results returned + category (str): Restricting class : One of 'record', 'file', 'gtable, + 'applet', 'workflow' + condition (function(results) -> bool): The method will only return + when the results matches the condition. + + Returns: + List[DXPath]: Iterates over listed files that match an optional pattern. + """ + results = list(self.walkfiles( + canonicalize=canonicalize, + starts_with=starts_with, + limit=limit, + category=category + )) + if not results or not results[0]: # when results == [[]] + results = [] + utils.validate_condition(condition) + utils.check_condition(condition, results) + return results + + def list_iter(self, + canonicalize=False, + starts_with=None, + limit=None, + category=None, + ): + """Iterate over contents using the resource of the path as a prefix. + + Args: + canonicalize (boolean): whether to return canonicalized paths + starts_with (str): Allows for an additional search path to + be appended to the resource of the dx path. Note that this + resource path is treated as a directory + limit (int): Limit the amount of results returned + category (str): Restricting class : One of 'record', 'file', 'gtable, + 'applet', 'workflow' + + Returns: + Iter[DXPath]: Iterates over listed files that match an optional pattern. + """ + return self.walkfiles( + canonicalize=canonicalize, + starts_with=starts_with, + limit=limit, + category=category + ) + + def listdir(self, only='all', canonicalize=False): + """List the path as a dir, returning top-level directories and files. + + Args: + canonicalize (boolean): whether to return canonicalized paths + only (str): "objects" for only objects, "folders" for only folder, + "all" for both + + Returns: + List[DXPath]: Iterates over listed files directly within the resource + + Raises: + NotFoundError: When resource folder is not present on DX platform + """ + proj_id = self.canonical_project + proj_name = self.virtual_project + ans_list = [] + kwargs = { + 'only': only, + 'describe': {'fields': {'name': True, 'folder': True}} + } + if self._is_folder(): + kwargs.update({'folder': self.resource}) + elif self.resource: + return ans_list + try: + obj_dict = dxpy.DXProject(dxid=proj_id).list_folder(**kwargs) + except dxpy.exceptions.ResourceNotFound: + raise NotFoundError('The specified folder ({}) was not found'.format( + self.resource)) + for key, values in obj_dict.items(): + for entry in values: + if canonicalize: + ans_list.append(DXCanonicalPath('dx://{}:/{}'.format( + proj_id, (entry.lstrip('/') if key == 'folders' else entry['id'])))) + else: + if key == 'folders': + ans_list.append(DXVirtualPath(self.drive + proj_name + ':' + entry)) + else: + ans_list.append(DXVirtualPath(self.drive + proj_name + ':' + + entry['describe']['folder']) + / entry['describe']['name']) + return ans_list + + def listdir_iter(self, canonicalize=False): + """Iterate the path as a dir, returning top-level directories and files. + + Args: + canonicalize (boolean): whether to return canonicalized paths + + Returns: + Iter[DXPath]: Iterates over listed files directly within the resource + """ + folders = self.listdir(only='folders', canonicalize=canonicalize) + for folder in folders: + yield folder + for data in self.walkfiles( + canonicalize=canonicalize, + recurse=False + ): + yield data + + def walkfiles(self, + pattern=None, + canonicalize=False, + recurse=True, + starts_with=None, + limit=None, + category=None): + """Iterates over listed files that match an optional pattern. + + Args: + pattern (str): glob pattern to match the filenames against. + canonicalize (boolean): whether to return canonicalized paths + recurse (boolean): whether to look in subfolders of folder as well + starts_with (str): Allows for an additional search path to + be appended to the resource of the dx path. Note that this + resource path is treated as a directory + limit (int): Limit the amount of results returned + category (str): Restricting class : One of 'record', 'file', 'gtable, + 'applet', 'workflow' + + Returns: + Iter[DXPath]: Iterates over listed files that match an optional pattern. + """ + proj_id = self.canonical_project + proj_name = self.virtual_project + kwargs = { + 'project': proj_id, + 'name': pattern, + 'name_mode': 'glob', + # the query performance is similar w/wo describe field, + # hence no need to customize query based on canonicalize flag + 'describe': {'fields': {'name': True, 'folder': True}}, + 'recurse' : recurse, + 'classname': category, + 'limit': limit, + 'folder': (self.resource or '/') + (starts_with or '') + } + if self.resource and not self._is_folder(): # if path is a file path + yield [] + return + list_gen = dxpy.find_data_objects(**kwargs) + for obj in list_gen: + if canonicalize: + yield DXCanonicalPath('dx://{}:/{}'.format(obj['project'], obj['id'])) + else: + dx_p = DXVirtualPath(self.drive + proj_name + ':' + obj['describe']['folder']) + dx_p = dx_p / obj['describe']['name'] + yield dx_p + + def glob(self, pattern, condition=None, canonicalize=False): + """ Glob for pattern relative to this directory.""" + + results = list(self.walkfiles( + canonicalize=canonicalize, + pattern=pattern + )) + if not results or not results[0]: # when results == [[]] + results = [] + utils.validate_condition(condition) + utils.check_condition(condition, results) + return results + + def exists(self): + """Checks existence of the path. + + Returns True if the path exists, False otherwise. + + Returns: + bool: True if the path exists, False otherwise. + """ + try: + # first see if there is a specific corresponding object + self.stat() + return True + except (NotFoundError, ValueError): + pass + # otherwise we could be a directory, so try to grab first + # file/subfolder + if self._is_folder(): + try: + self.list(limit=1) + return True + except NotFoundError: + return False + return False + + def stat(self): + if not self.__stat: + + if self.canonical_resource: + self.__stat = dxpy.DXFile(dxid=self.canonical_resource, + project=self.canonical_project).describe() + else: + self.__stat = dxpy.DXProject(dxid=self.canonical_project).describe() + return self.__stat + + +class DXVirtualPath(DXPath): + """Class Handler for DXPath of form 'dx://project-{ID}:/a/b/c' or 'dx://a/b/c'""" + + @property + def virtual_project(self): + if utils.is_valid_dxid(self.project, 'project'): + return dxpy.DXProject(dxid=self.project).name + return self.project + + @property + def virtual_resource(self): + return self.resource + + @property + def virtual_path(self): + return self + + @cached_property + def canonical_project(self): + """Returns the unique project that matches the name that user has view access to. + If no match is found, returns None + + Raises: + DuplicateProjectError - if project name is not unique on DX platform + NotFoundError - If project name doesn't exist on DNAnexus + """ + if utils.is_valid_dxid(self.project, 'project'): + return self.project + else: + try: + proj_dict = dxpy.find_one_project( + name=self.project, level='VIEW', zero_ok=True, more_ok=False) + if proj_dict is None: + raise ProjectNotFoundError('No projects were found with given name ({})' + .format(self.project)) + return proj_dict['id'] + except DXError as e: + raise DuplicateProjectError('Duplicate projects were found with given name ({})' + .format(self.project), e) + + @cached_property + def canonical_resource(self): + """Returns the dx file-ID of the uniquely matched filename + + Raises: + DuplicateError: if filename is not unique + NotFoundError: if resource is not found on DX platform + """ + if not self.resource: + return None + if self._is_folder(): + raise ValueError('DXPath ({}) ending in folders cannot be canonicalized'.format(self)) + objects = [{ + 'name': self.name, + 'folder': self.resource.parent, + 'project': self.canonical_project + }] + results = dxpy.resolve_data_objects(objects=objects)[0] + if len(results) > 1: + raise DuplicateError('The virtual resource is not unique on DNAnexus') + elif len(results) == 1: + return results[0]['id'] + else: + raise NotFoundError('The virtual resource does not exist on DNAnexus') + + @property + def canonical_path(self): + """Returns the unique file that matches the given path""" + return DXCanonicalPath(self.drive + self.canonical_project + + ':') / (self.canonical_resource or '') + + +class DXCanonicalPath(DXPath): + """Class Handler for DXPath of form 'dx://project-{ID}:/file-{ID}' or 'dx://project-{ID}:'""" + + @property + def virtual_project(self): + return self.virtual_path.project + + @property + def virtual_resource(self): + return self.virtual_path.resource + + @cached_property + def virtual_path(self): + proj = dxpy.DXProject(dxid=self.project) + virtual_p = DXVirtualPath(self.drive + proj.name + ':/') + if self.resource: + file_h = dxpy.DXFile(dxid=self.canonical_resource) + virtual_p = virtual_p / file_h.folder[1:] / file_h.name + return virtual_p + + @property + def canonical_project(self): + return self.project + + @property + def canonical_resource(self): + return self.resource.lstrip('/') if self.resource else None + + @property + def canonical_path(self): + return self diff --git a/stor/test.py b/stor/test.py index b5c31dd5..4578e791 100644 --- a/stor/test.py +++ b/stor/test.py @@ -1,9 +1,16 @@ import mock +import unittest +import six +import os + +import dxpy +from vcr_unittest import VCRMixin + +from stor import Path from stor import s3 from stor.s3 import S3Path from stor.swift import SwiftPath from stor import settings -import unittest class SwiftTestMixin(object): @@ -181,6 +188,20 @@ def setup_s3_mocks(self): self.mock_get_s3_transfer_config = s3_transfer_config_patcher.start() +# TODO(akumar) +class DXTestMixin(VCRMixin): + """A mixin with helpers for mocking out swift. + + DXTestMixin should be used to create base test classes for anything + that accesses swift. + """ + def setup_dx_auth(self): + pass + + def assert_dx_lists_equal(self, r1, r2): + self.assertEquals(sorted(r1), sorted(r2)) + + class SwiftTestCase(unittest.TestCase, SwiftTestMixin): """A TestCase class that sets up swift mocks and provides additional assertions""" def setUp(self): @@ -207,3 +228,57 @@ def setUp(self): del s3._thread_local.s3_transfer_config except AttributeError: pass + + +class DXTestCase(DXTestMixin, unittest.TestCase): + """A TestCase class that sets up DNAnexus vars and provides additional assertions""" + + def _get_vcr_kwargs(self): + kwargs = super(DXTestCase, self)._get_vcr_kwargs() + kwargs.update({'record_mode': 'new_episodes'}) + kwargs.update({'filter_headers': ['authorization']}) + return kwargs + + def _get_cassette_library_dir(self): + cassette_dir = super(DXTestCase, self)._get_cassette_library_dir() + return os.path.join(cassette_dir, self.__class__.__name__) + + def _get_cassette_name(self): + return '{0}.yaml'.format(self._testMethodName) + + def new_proj_name(self): + return '{0}.{1}'.format(self.__class__.__name__, + self._testMethodName) + + def setup_temporary_project(self): + self.project_handler = self.setup_project() + self.project = self.project_handler.name + self.proj_id = self.project_handler.get_id() + self.addCleanup(self.teardown_project) + + def setup_project(self): + test_proj = dxpy.DXProject() + test_proj.new(self.new_proj_name()) + return test_proj + + def setup_files(self, files): + """Sets up files for testing + + Args: + files (List[Str]): list of files relative to project root to be created. + Only virtual files are allowed + """ + for i, curr_file in enumerate(files): + dx_p = Path(curr_file) + try: + self.project_handler.new_folder(dx_p.parent, parents=True) + except dxpy.exceptions.InvalidState: + pass + with dxpy.new_dxfile(name=dx_p.name, + folder=dx_p.parent, + project=self.proj_id) as f: + f.write('data'+str(i)) + + def teardown_project(self): + self.project_handler.destroy() + self.project_handler = None diff --git a/stor/tests/test_dx.py b/stor/tests/test_dx.py new file mode 100644 index 00000000..948aff0b --- /dev/null +++ b/stor/tests/test_dx.py @@ -0,0 +1,756 @@ +import pytest +import time +from tempfile import NamedTemporaryFile +import unittest +import vcr + +import dxpy +import dxpy.bindings as dxb +import freezegun +import mock +import six.moves.urllib as urllib + +import stor +from stor import exceptions +from stor import NamedTemporaryDirectory +from stor import Path +from stor import settings +from stor import swift +from stor import utils +from stor.dx import DXPath, DXVirtualPath, DXCanonicalPath +import stor.dx as dx +from stor.test import DXTestCase +from stor.tests.shared_obs import SharedOBSFileCases + + +class TestBasicPathMethods(unittest.TestCase): + def test_name(self): + p = Path('dx://project:/path/to/resource') + self.assertEqual(p.name, 'resource') + + def test_parent(self): + p = Path('dx://project:/path/to/resource') + self.assertEqual(p.parent, 'dx://project:/path/to') + + def test_dirname(self): + p = Path('dx://project:/path/to/resource') + self.assertEqual(p.dirname(), 'dx://project:/path/to') + + def test_dirname_top_level(self): + p1 = Path('dx://project') + self.assertEqual(p1.dirname(), 'dx://project') + + p2 = Path('dx://project:/') + self.assertEqual(p2.dirname(), 'dx://project:/') + + def test_basename(self): + p = Path('dx://project:/path/to/resource') + self.assertEqual(p.basename(), 'resource') + + +class TestRepr(unittest.TestCase): + def test_repr(self): + dx_p = DXPath('dx://t:/c/p') + self.assertEqual(eval(repr(dx_p)), dx_p) + + +class TestPathManipulations(unittest.TestCase): + def test_add(self): + dx_p = DXPath('dx://a:') + dx_p = dx_p + 'b' + Path('c') + self.assertTrue(isinstance(dx_p, DXPath)) + self.assertEqual(dx_p, 'dx://a:bc') + + def test_div(self): + dx_p = DXPath('dx://t:') + dx_p = dx_p / 'c' / Path('p') + self.assertTrue(isinstance(dx_p, DXPath)) + self.assertEqual(dx_p, 'dx://t:/c/p') + + +class TestProject(unittest.TestCase): + def test_project_none(self): + with pytest.raises(ValueError, match='Project is required'): + DXPath('dx://') + + def test_project_exists(self): + dx_p = DXPath('dx://project') + self.assertEqual(dx_p.project, 'project') + + +class TestResource(unittest.TestCase): + + def test_resource_none_w_project(self): + dx_p = DXPath('dx://project:/') + self.assertIsNone(dx_p.resource) + + def test_resource_object(self): + dx_p = DXPath('dx://project:/obj') + self.assertEqual(dx_p.resource, '/obj') + + def test_resource_trailing_slash(self): + dx_p = DXPath('dx://project:/dir/') + self.assertEqual(dx_p.resource, '/dir/') + + def test_resource_nested_obj(self): + dx_p = DXPath('dx://project:/nested/obj.txt') + self.assertEqual(dx_p.resource, '/nested/obj.txt') + + def test_resource_nested_dir(self): + dx_p = DXPath('dx://project:/nested/dir/') + self.assertEqual(dx_p.resource, '/nested/dir/') + + +@unittest.skip("skipping") +class TestDXFile(DXTestCase): # TODO + + def test_read_on_open_file(self): + d = dxpy.bindings.dxfile_functions.new_dxfile() + self.assertEqual(d.describe()['state'], 'open') + + dx_p = DXPath('dx://{}/{}'.format(self.project, d.name)) + with self.assertRaisesRegexp(ValueError, 'not in closed state'): + dx_p.read_object() + + d.remove() + + def test_read_success_on_closed_file(self): + dx_p = DXPath('dx://{}/{}'.format(self.project, self.file_handler.name)) + self.assertEqual(dx_p.read_object(), b'data') + self.assertEqual(dx_p.open().read(), 'data') + + def test_iterating_over_files(self): + data = b'''\ +line1 +line2 +line3 +line4 +''' + with dxpy.bindings.dxfile_functions.new_dxfile() as d: + d.write(data) + d.state = 'closed' + dx_p = DXPath('dx://{}/{}'.format(self.project, d.name)) + # open().read() should return str for r + self.assertEqual(dx_p.open('r').read(), data.decode('ascii')) + # open().read() should return bytes for rb + self.assertEqual(dx_p.open('rb').read(), data) + self.assertEqual(dx_p.open().readlines(), + [l + '\n' for l in data.decode('ascii').split('\n')][:-1]) + for i, line in enumerate(dx_p.open(), 1): + self.assertEqual(line, 'line%d\n' % i) + + self.assertEqual(next(dx_p.open()), 'line1\n') + self.assertEqual(next(iter(dx_p.open())), 'line1\n') + + def test_write_multiple_w_context_manager(self, mock_upload): + dx_p = DXPath('dx://{}/{}'.format(self.project, self.file_handler.name)) + with dx_p.open(mode='wb') as obj: + obj.write(b'hello') + obj.write(b' world') + self.assertIn(b'hello world', dx_p.read_object()) + + @mock.patch('time.sleep', autospec=True) + @mock.patch.object(DXPath, 'upload', autospec=True) + def test_write_multiple_flush_multiple_upload(self, mock_upload): + dx_p = DXPath('dx://project/obj') + with NamedTemporaryFile(delete=False) as ntf1,\ + NamedTemporaryFile(delete=False) as ntf2,\ + NamedTemporaryFile(delete=False) as ntf3: + with mock.patch('tempfile.NamedTemporaryFile', autospec=True) as ntf: + ntf.side_effect = [ntf1, ntf2, ntf3] + with dx_p.open(mode='wb') as obj: + obj.write(b'hello') + obj.flush() + obj.write(b' world') + obj.flush() + u1, u2, u3 = mock_upload.call_args_list + u1[0][1][0].source == ntf1.name + u2[0][1][0].source == ntf2.name + u3[0][1][0].source == ntf3.name + u1[0][1][0].object_name == dx_p.resource + u2[0][1][0].object_name == dx_p.resource + u3[0][1][0].object_name == dx_p.resource + self.assertEqual(open(ntf1.name).read(), 'hello') + self.assertEqual(open(ntf2.name).read(), 'hello world') + # third call happens because we don't care about checking for + # additional file change + self.assertEqual(open(ntf3.name).read(), 'hello world') + + +@unittest.skip("skipping") +class TestDXShared(SharedOBSFileCases, DXTestCase): + drive = 'dx://' + path_class = DXPath + normal_path = DXPath('dx://project:/obj') + + +class TestCanonicalProject(DXTestCase): + def test_no_project(self): + dx_p = DXPath('dx://Random_Project:/') + with pytest.raises(dx.ProjectNotFoundError, match='No projects'): + dx_p.canonical_project + + def test_unique_project(self): + self.setup_temporary_project() + dx_p = DXPath('dx://'+self.project) + self.assertTrue(utils.is_valid_dxid(dx_p.canonical_project, 'project')) + + def test_duplicate_projects(self): + self.setup_temporary_project() + test_proj = dxb.DXProject() + test_proj.new(self.new_proj_name()) + dx_p = DXPath('dx://' + self.project) + with pytest.raises(dx.DuplicateProjectError, match='Duplicate projects'): + dx_p.canonical_project + test_proj.destroy() + + +class TestCanonicalResource(DXTestCase): + def test_no_resource(self): + self.setup_temporary_project() + dx_p = DXPath('dx://' + self.project + ':/random.txt') + with pytest.raises(dx.NotFoundError, match='does not exist'): + dx_p.canonical_resource + + def test_unique_resource(self): + self.setup_temporary_project() + self.setup_files(['/temp_file.txt']) + dx_p = DXPath('dx://'+self.project + ':/temp_file.txt') + self.assertTrue(utils.is_valid_dxid(dx_p.canonical_resource, 'file')) + + def test_duplicate_resource(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/folder_file.txt']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder/folder_file.txt') + with pytest.raises(dx.DuplicateError, match='not unique'): + dx_p.canonical_resource + + +class TestListDir(DXTestCase): + def test_listdir_project(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://'+self.project) + results = dx_p.listdir() + self.assert_dx_lists_equal(results, [ + 'dx://'+self.project+':/temp_folder', + 'dx://'+self.project+':/temp_file.txt' + ]) + + def test_listdir_file(self): + self.setup_temporary_project() + self.setup_files(['/temp_file.txt']) + dx_p = DXPath('dx://'+self.project+':/temp_file.txt') + results = dx_p.listdir() + self.assertEqual(results, []) + + def test_listdir_empty_folder(self): + self.setup_temporary_project() + self.project_handler.new_folder('/temp_folder') + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.listdir() + self.assertEqual(results, []) + + def test_listdir_folder_w_file(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/temp_file.txt']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.listdir() + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/temp_file.txt', + 'dx://' + self.project + ':/temp_folder/folder_file.txt' + ]) + + def test_listdir_absent_folder(self): + self.setup_temporary_project() + dx_p = DXPath('dx://' + self.project + ':/random_folder') + with pytest.raises(dx.NotFoundError, match='specified folder'): + dx_p.listdir() + + def test_listdir_folder_share_filename(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.listdir() + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt' + ]) + + def test_listdir_canonical(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.project) + results = dx_p.listdir(canonicalize=True) + self.assertIn('dx://'+self.proj_id+':/temp_folder', results) + self.assertEqual(len(results), 2) + + def test_listdir_on_canonical_project(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.proj_id) + results = dx_p.listdir() + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder', + 'dx://' + self.project + ':/temp_file.txt' + ]) + + def test_listdir_on_canonical_resource(self): + self.setup_temporary_project() + self.setup_files(['/temp_file.txt']) + dx_p = DXPath('dx://' + self.project + ':/temp_file.txt').canonical_path + results = dx_p.listdir() + self.assertEqual(results, []) + + def test_listdir_iter_project(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.project) + results = list(dx_p.listdir_iter()) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder', + 'dx://' + self.project + ':/temp_file.txt' + ]) + + def test_listdir_iter_canon_on_canon(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/random/temp_file.txt']) + dx_p = DXPath('dx://' + self.proj_id + ':/temp_folder') + results = list(dx_p.listdir_iter(canonicalize=True)) + self.assertIn('dx://' + self.proj_id + ':/temp_folder/random', results) + self.assertEqual(len(results), 2) + + +class TestList(DXTestCase): + def test_list_project(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.project) + results = dx_p.list() + self.assert_dx_lists_equal(results, [ + 'dx://'+self.project+':/temp_folder/folder_file.txt', + 'dx://'+self.project+':/temp_file.txt' + ]) + + def test_list_file(self): + self.setup_temporary_project() + self.setup_files(['/temp_file.txt']) + dx_p = DXPath('dx://'+self.project+':/temp_file.txt') + results = dx_p.list() + self.assertEqual(results, []) + + def test_list_empty_folder(self): + self.setup_temporary_project() + self.project_handler.new_folder('/temp_folder') + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.list() + self.assertEqual(results, []) + + def test_list_folder_w_files(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/temp_file.txt']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.list() + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/temp_file.txt', + 'dx://' + self.project + ':/temp_folder/folder_file.txt' + ]) + + def test_list_absent_folder(self): + self.setup_temporary_project() + dx_p = DXPath('dx://' + self.project + ':/random_folder') + results = dx_p.list() + self.assertEqual(results, []) + + def test_list_folder_share_filename(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.list() + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt' + ]) + + def test_list_canonical(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.project) + results = dx_p.list(canonicalize=True) + self.assertTrue(all(self.proj_id in result for result in results)) + self.assertEqual(len(results), 2) + + def test_list_limit(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.project) + results = dx_p.list(limit=1) + self.assertEqual(len(results), 1) + + def test_list_starts_with(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.project) + results = dx_p.list(starts_with='temp_folder') + self.assert_dx_lists_equal(results, [ + 'dx://'+self.project+':/temp_folder/folder_file.txt' + ]) + + def test_list_w_condition(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.project) + results = dx_p.list(starts_with='temp_folder', condition=lambda res: len(res) == 1) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt' + ]) + + def test_list_fail_w_condition(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.project) + with pytest.raises(exceptions.ConditionNotMetError, match='not met'): + results = dx_p.list(condition=lambda res: len(res) == 1) + + def test_list_w_category(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.project) + dxpy.new_dxworkflow(title='Workflow', project=self.proj_id) + results = dx_p.list(category='file') + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt', + 'dx://' + self.project + ':/temp_file.txt' + ]) + + def test_list_on_canonical_project(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.proj_id) + results = dx_p.list() + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt', + 'dx://' + self.project + ':/temp_file.txt' + ]) + + def test_list_on_canonical_resource(self): + self.setup_temporary_project() + self.setup_files(['/temp_file.txt']) + dx_p = DXPath('dx://' + self.project + ':/temp_file.txt').canonical_path + results = dx_p.list() + self.assertEqual(results, []) + + def test_list_iter_project(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.project) + results = list(dx_p.list_iter()) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt', + 'dx://' + self.project + ':/temp_file.txt' + ]) + + def test_list_iter_canon_on_canon(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/random/temp_file.txt']) + dx_p = DXPath('dx://' + self.proj_id + ':/temp_folder') + results = list(dx_p.list_iter(canonicalize=True)) + self.assertTrue(all(self.proj_id in result for result in results)) + self.assertTrue(all('temp_folder' not in result for result in results)) + self.assertEqual(len(results), 2) + + +class TestWalkFiles(DXTestCase): + def test_pattern_w_prefix(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.project) + results = list(dx_p.walkfiles(pattern='fold*')) + self.assert_dx_lists_equal(results, [ + 'dx://'+self.project+':/temp_folder/folder_file.txt' + ]) + + def test_pattern_w_suffix(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.csv', + '/temp_file.txt']) + dx_p = DXPath('dx://'+self.project) + results = list(dx_p.walkfiles(pattern='*.txt')) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_file.txt' + ]) + + def test_pattern_w_prefix_suffix(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.csv', + '/temp_file.txt']) + dx_p = DXPath('dx://'+self.project) + results = list(dx_p.walkfiles(pattern='*file*')) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_file.txt', + 'dx://' + self.project + ':/temp_folder/folder_file.csv' + ]) + + def test_pattern_share_folder_match(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.csv', + '/temp_folder.txt']) + dx_p = DXPath('dx://'+self.project) + results = list(dx_p.walkfiles(pattern='temp_folder*')) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder.txt' + ]) + + def test_pattern_no_match(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.csv', + '/random_file.txt']) + dx_p = DXPath('dx://'+self.project) + results = list(dx_p.walkfiles(pattern='*temp*')) + self.assertEqual(results, []) + + +class TestStat(DXTestCase): + def test_stat_folder(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.csv']) + with pytest.raises(ValueError, match='ending in folders'): + DXPath('dx://'+self.project+':/temp_folder/').stat() + with pytest.raises(ValueError, match='ending in folders'): + DXPath('dx://'+self.project+':/temp_folder').stat() + + def test_stat_project_error(self): + self.setup_temporary_project() # creates project with name in self.project + test_proj = dxb.DXProject() + test_proj.new(self.new_proj_name()) # creates duplicate project + + with pytest.raises(dx.DuplicateProjectError, match='Duplicate projects'): + DXPath('dx://'+self.project+':').stat() + with pytest.raises(dx.DuplicateProjectError, match='Duplicate projects'): + DXPath('dx://'+self.project+':/').stat() + with pytest.raises(dx.NotFoundError, match='No projects'): + DXPath('dx://Random_Proj:').stat() + + test_proj.destroy() + + def test_stat_virtual_project(self): + self.setup_temporary_project() + dx_p = DXPath('dx://'+self.project) + response = dx_p.stat() + self.assertIn('region', response) # only projects have regions + dx_p = DXPath('dx://'+self.project+':') + response = dx_p.stat() + self.assertIn('region', response) + + def test_stat_canonical_project(self): + self.setup_temporary_project() + dx_p = DXPath('dx://'+self.proj_id+':') + response = dx_p.stat() + self.assertIn('region', response) + + def test_stat_file(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://'+self.project+':/temp_file.txt') + response = dx_p.stat() + self.assertIn('folder', response) # only files have folders + dx_p = DXPath('dx://'+self.project+':/temp_folder/folder_file.txt') + response = dx_p.stat() + self.assertIn('folder', response) + + def test_stat_canonical_resource(self): + self.setup_temporary_project() + self.setup_files(['/temp_file.txt']) + dx_p = DXPath('dx://'+self.project+':/temp_file.txt').canonical_path + response = dx_p.stat() + self.assertIn('folder', response) # only files have folders + + +class TestExists(DXTestCase): + def test_false(self): + self.setup_temporary_project() + dx_p = DXPath('dx://'+self.project+':/random.txt') + result = dx_p.exists() + self.assertFalse(result) + + def test_false_no_folder(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt']) + dx_p = DXPath('dx://' + self.project + ':/random_folder/folder_file.txt') + result = dx_p.exists() + self.assertFalse(result) + + def test_raises_on_duplicate(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/folder_file.txt']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder/folder_file.txt') + with pytest.raises(dx.DuplicateError, match='not unique'): + dx_p.exists() + + def test_true_file(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder/folder_file.txt') + result = dx_p.exists() + self.assertTrue(result) + + def test_true_empty_dir(self): + self.setup_temporary_project() + self.project_handler.new_folder('/temp_folder') + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + result = dx_p.exists() + self.assertTrue(result) + + def test_true_dir_with_object(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + result = dx_p.exists() + self.assertTrue(result) + + def test_project_does_not_exist(self): + dx_p = DXPath('dx://random_project:/') + result = dx_p.exists() + self.assertFalse(result) + + +class TestGlob(DXTestCase): + def test_suffix_pattern(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/temp_file.csv']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.glob('*.txt') + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt' + ]) + + def test_prefix_pattern(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/file.csv']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.glob('file*') + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/file.csv' + ]) + + def test_valid_pattern(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/temp_file.csv']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.glob('*fi*le*') + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt', + 'dx://' + self.project + ':/temp_folder/temp_file.csv' + ]) + + def test_valid_pattern_wo_wildcard(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/temp_file.csv']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.glob('file') + self.assertEqual(results, []) + + def test_pattern_no_file_match(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.csv', + '/random_file.txt']) + dx_p = DXPath('dx://'+self.project) + results = dx_p.glob('*temp*') + self.assertEqual(results, []) + + def test_glob_cond_met(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.glob('*fi*le*', condition=lambda res: len(res) == 1) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt' + ]) + + def test_cond_no_met(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/temp_file.csv']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + with pytest.raises(exceptions.ConditionNotMetError, match='not met'): + dx_p.glob('*fi*le*', condition=lambda res: len(res) == 1) + + +class TestTempUrl(DXTestCase): + def test_fail_on_project(self): + self.setup_temporary_project() + dx_p = DXPath('dx://' + self.project) + with pytest.raises(dx.DXError, match='DX Projects'): + dx_p.temp_url() + + def test_fail_on_folder(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt']) + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + with pytest.raises(dx.DXError, match='DXPaths ending in folders'): + dx_p.temp_url() + + def test_on_file(self): + self.setup_temporary_project() + with dxpy.new_dxfile(name='temp_file.txt', + project=self.proj_id) as f: + f.write('data') + while f._get_state().lower() != 'closed': + time.sleep(1) # to allow for the file state to go to closed after calling close() + dx_p = DXPath('dx://' + self.project + ':/temp_file.txt') + result = dx_p.temp_url() + self.assertIn('dl.dnanex.us', result) + + def test_on_file_canonical(self): + self.setup_temporary_project() + with dxpy.new_dxfile(name='temp_file.txt', + project=self.proj_id) as f: + f.write('data') + while f._get_state().lower() != 'closed': + time.sleep(1) # to allow for the file state to go to closed after calling close() + dx_p = DXPath('dx://' + self.project + ':/temp_file.txt').canonical_path + result = dx_p.temp_url() + self.assertIn('dl.dnanex.us', result) + + def test_on_file_named_timed(self): # TODO + self.setup_temporary_project() + with dxpy.new_dxfile(name='temp_file.txt', + project=self.proj_id) as f: + f.write('data') + while f._get_state().lower() != 'closed': + time.sleep(1) # to allow for the file state to go to closed after calling close() + dx_p = DXPath('dx://' + self.project + ':/temp_file.txt') + result = dx_p.temp_url(filename='random.txt', lifetime=1) + self.assertIn('dl.dnanex.us', result) + self.assertIn('random.txt', result) + url = urllib.request.urlopen(result) + self.assertIn('attachment', url.headers['content-disposition']) + self.assertIn('random.txt', url.headers['content-disposition']) + time.sleep(2) # for link to expire + with pytest.raises(urllib.error.HTTPError): + urllib.request.urlopen(result) diff --git a/stor/tests/test_dx_path_compat.py b/stor/tests/test_dx_path_compat.py new file mode 100644 index 00000000..e4671c5d --- /dev/null +++ b/stor/tests/test_dx_path_compat.py @@ -0,0 +1,145 @@ +import pytest +import unittest + +from stor.dx import DXPath, DXCanonicalPath, DXVirtualPath + + +class TestBasics(unittest.TestCase): + def test_relpath(self): + with self.assertRaises(AttributeError): + DXPath('dx://project:').relpathto() + with self.assertRaises(AttributeError): + DXPath('dx://project:').relpath() + + def test_construction_from_none(self): + with self.assertRaises(TypeError): + DXPath(None) + + def test_construction_from_no_project(self): + with pytest.raises(ValueError, match='Project is required to construct a DXPath'): + DXPath('dx://') + + def test_canonical_construct_fail(self): + with pytest.raises(ValueError, match='ambiguous'): + DXPath('dx://project-123456789012345678901234:/file-123456789012345678901234/a/') + + def test_canonical_construct_wo_file(self): + for path_str in [ + 'dx://project-123456789012345678901234:/', + 'dx://project-123456789012345678901234:', + 'dx://project-123456789012345678901234' + ]: + p = DXPath(path_str) + self.assertIsInstance(p, DXCanonicalPath, 'Expected canonical DXPath for (%s)' % p) + self.assertEqual(p.project, 'project-123456789012345678901234', + 'Project parsing unsuccessful for %s' % p) + + def test_canonical_construct_w_file(self): + for path_str in [ + 'dx://project-123456789012345678901234:file-123456789012345678901234', + 'dx://project-123456789012345678901234:/file-123456789012345678901234' + ]: + p = DXPath(path_str) + self.assertIsInstance(p, DXCanonicalPath, 'Expected canonical DXPath for (%s)' % p) + self.assertEqual(p.project, 'project-123456789012345678901234', + 'Project parsing unsuccessful for %s' % p) + self.assertEqual(p.resource, '/file-123456789012345678901234', + 'Resource parsing error for %s' % p) + + def test_virtual_construct_wo_resource(self): + for path_str in [ + 'dx://proj123:/', + 'dx://proj123:', + 'dx://proj123' + ]: + p = DXPath(path_str) + self.assertIsInstance(p, DXVirtualPath, 'Expected virtual DXPath for (%s)' % p) + self.assertEqual(p.project, 'proj123', + 'Project parsing unsuccessful for %s' % p) + + def test_virtual_construct_wo_folder(self): + for path_str in [ + 'dx://proj123:/a.ext', + 'dx://proj123:a.ext' + ]: + p = DXPath(path_str) + self.assertIsInstance(p, DXVirtualPath, 'Expected virtual DXPath for (%s)' % p) + self.assertEqual(p.project, 'proj123', + 'Project parsing unsuccessful for %s' % p) + self.assertEqual(str(p.resource), '/a.ext', + 'Resource parsing error for %s' % p) + + for path_str in [ + 'dx://project-123456789012345678901234:/a.ext', + 'dx://project-123456789012345678901234:a.ext' + ]: + p = DXPath(path_str) + self.assertIsInstance(p, DXVirtualPath, 'Expected virtual DXPath for (%s)' % p) + self.assertEqual(p.project, 'project-123456789012345678901234', + 'Project parsing unsuccessful for %s' % p) + self.assertEqual(p.resource, '/a.ext', + 'Resource parsing error for %s' % p) + + def test_virtual_construct_w_folder(self): + for path_str in [ + 'dx://proj123:/b/c/a.ext', + 'dx://proj123:b/c/a.ext' + ]: + p = DXPath(path_str) + self.assertIsInstance(p, DXVirtualPath, 'Expected virtual DXPath for (%s)' % p) + self.assertEqual(p.project, 'proj123', + 'Project parsing unsuccessful for %s' % p) + self.assertEqual(p.resource, '/b/c/a.ext', + 'Resource parsing error for %s' % p) + + for path_str in [ + 'dx://project-123456789012345678901234:/b/c/a.ext', + 'dx://project-123456789012345678901234:b/c/a.ext' + ]: + p = DXPath(path_str) + self.assertIsInstance(p, DXVirtualPath, 'Expected virtual DXPath for (%s)' % p) + self.assertEqual(p.project, 'project-123456789012345678901234', + 'Project parsing unsuccessful for %s' % p) + self.assertEqual(p.resource, '/b/c/a.ext', + 'Resource parsing error for %s' % p) + + def test_string_compatibility(self): + """ Test compatibility with ordinary strings. """ + x = DXPath('dx://xyzzy:') + assert x == 'dx://xyzzy:' + assert x == str('dx://xyzzy:') + assert 'xyz' in x + assert 'analysis' not in x + + # sorting + items = [DXPath('dx://fhj:'), + DXPath('dx://fgh:'), + 'dx://E:', + DXPath('dx://d:'), + 'dx://A:', + DXPath('dx://B:'), + 'dx://c:'] + items.sort() + self.assertEqual(items, + ['dx://A:', 'dx://B:', 'dx://E:', 'dx://c:', + 'dx://d:', 'dx://fgh:', 'dx://fhj:']) + + # Test p1/p1. + p1 = DXPath("dx://foo:") + p2 = "bar" + self.assertEqual(p1 / p2, DXPath("dx://foo:/bar")) + + def test_properties(self): + # Create sample DXPath object. + f = DXPath('dx://project:/prefix/whatever.csv') + + self.assertEqual(f.parent, DXPath('dx://project:/prefix')) + + # .name + self.assertEqual(f.name, 'whatever.csv') + self.assertEqual(f.parent.name, 'prefix') + self.assertEqual(f.parent.parent.name, 'project:') + + # .ext + self.assertEqual(f.ext, '.csv') + self.assertEqual(f.parent.ext, '') diff --git a/stor/utils.py b/stor/utils.py index a475e119..abcf44c8 100644 --- a/stor/utils.py +++ b/stor/utils.py @@ -8,6 +8,9 @@ from subprocess import check_call import tempfile +from dxpy.bindings import verify_string_dxid +from dxpy.exceptions import DXError + from stor import exceptions logger = logging.getLogger(__name__) @@ -230,6 +233,64 @@ def is_obs_path(p): return is_s3_path(p) or is_swift_path(p) +def is_dx_path(p): + """Determines if the path is a DX path. + + All DX paths start with ``dx://`` + + Args: + p (str): The path string + + Returns + bool: True if p is a DX path, False otherwise. + """ + from stor.dx import DXPath + return p.startswith(DXPath.drive) + + +def is_valid_dxid(dxid, expected_classes): + """wrapper class for verify_string_dxid, because + verify_string_dxid returns None if success, raises error if failed + + Args: Accepts same args as verify_string_dxid + + Returns + bool: Whether given dxid is a valid path of one of expected_classes + """ + try: + return verify_string_dxid(dxid, expected_classes) is None + except DXError: + return False + + +def find_dx_class(p): + """Finds the class of the DX path : DXVirtualPath or DXCanonicalPath + + Args: + p (str): The path string + + Returns + cls: DXVirtualPath or DXCanonicalPath + """ + from stor.dx import DXPath, DXCanonicalPath, DXVirtualPath + colon_pieces = p[len(DXPath.drive):].split(':', 1) + if not colon_pieces or not colon_pieces[0] or '/' in colon_pieces[0]: + raise ValueError('Project is required to construct a DXPath') + project = colon_pieces[0] + resource = (colon_pieces[1] if len(colon_pieces) == 2 else '').lstrip('/') + resource_parts = resource.split('/') + root_name, rest = resource_parts[0], resource_parts[1:] + canonical_resource = is_valid_dxid(root_name, 'file') or not resource + if canonical_resource and rest: + raise ValueError('DX folder paths that start with a valid file dxid are ambiguous') + canonical_project = is_valid_dxid(project, 'project') + + if canonical_project and canonical_resource: + return DXCanonicalPath + else: + return DXVirtualPath + + def is_writeable(path, swift_retry_options=None): """ Determine whether we have permission to write to path. diff --git a/tox.ini b/tox.ini index 7191dc5e..d4ae307b 100644 --- a/tox.ini +++ b/tox.ini @@ -19,3 +19,6 @@ whitelist_externals = make passenv = SWIFT_TEST_USERNAME SWIFT_TEST_PASSWORD OS_TEMP_URL_KEY AWS_TEST_ACCESS_KEY_ID AWS_DEFAULT_REGION AWS_TEST_SECRET_ACCESS_KEY OS_AUTH_URL + LC_LANG LC_ALL PYTHONIOENCODING + PYTEST_ADDOPTS + DXPY_LOGIN_TOKEN \ No newline at end of file