diff --git a/eodag/api/core.py b/eodag/api/core.py index bacbba51c7..2ef6b4614c 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -1853,6 +1853,9 @@ def _do_search( else: eo_product.collection = guesses[0].id + # remove property "eodag:download_link" which may have been useful to create its matching asset + eo_product.properties.pop("eodag:download_link", None) + if eo_product.search_intersection is not None: eo_product._register_downloader_from_manager(self._plugins_manager) diff --git a/eodag/api/product/_assets.py b/eodag/api/product/_assets.py index b07b58a869..fb9ffbe5a5 100644 --- a/eodag/api/product/_assets.py +++ b/eodag/api/product/_assets.py @@ -17,10 +17,14 @@ # limitations under the License. from __future__ import annotations +import logging import re from collections import UserDict -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any, Optional, Union +from eodag.plugins.apis.base import Api +from eodag.plugins.authentication.base import Authentication +from eodag.plugins.download.base import Download from eodag.utils.exceptions import NotAvailableError from eodag.utils.repr import dict_to_html_table @@ -30,6 +34,9 @@ from eodag.utils import Unpack +logger = logging.getLogger("eodag.assets") + + class AssetsDict(UserDict): """A UserDict object which values are :class:`~eodag.api.product._assets.Asset` contained in a :class:`~eodag.api.product._product.EOProduct` resulting from a @@ -164,15 +171,26 @@ class Asset(UserDict): {'href': 'http://somewhere/something'} """ + #: EOProduct the asset belongs to product: EOProduct + #: size of the asset size: int + #: name of the asset file filename: Optional[str] + #: relative path of the asset rel_path: str + #: The path to the asset, either remote or local if downloaded + location: str + #: The remote path to the asset + remote_location: str def __init__(self, product: EOProduct, key: str, *args: Any, **kwargs: Any) -> None: self.product = product self.key = key + self.downloader: Optional[Union[Api, Download]] = None + self.downloader_auth: Optional[Authentication] = None super(Asset, self).__init__(*args, **kwargs) + self.location = self.remote_location = self.data.get("href", "") def as_dict(self) -> dict[str, Any]: """Builds a representation of Asset to enable its serialization @@ -201,3 +219,33 @@ def _repr_html_(self): """ + + def register_downloader( + self, downloader: Union[Api, Download], authenticator: Optional[Authentication] + ) -> None: + """Give to the asset the information needed to download itself. + + :param downloader: The download method that it can use + :class:`~eodag.plugins.download.base.Download` or + :class:`~eodag.plugins.api.base.Api` + :param authenticator: The authentication method needed to perform the download + :class:`~eodag.plugins.authentication.base.Authentication` + """ + self.downloader = downloader + self.downloader_auth = authenticator + + # resolve locations and properties if needed with downloader configuration + location_attrs = ("location", "remote_location") + for location_attr in location_attrs: + if "%(" in getattr(self, location_attr): + try: + setattr( + self, + location_attr, + getattr(self, location_attr) % vars(self.downloader.config), + ) + except ValueError as e: + logger.debug( + f"Could not resolve asset.{location_attr} ({getattr(self, location_attr)})" + f" in register_downloader: {str(e)}" + ) diff --git a/eodag/api/product/_product.py b/eodag/api/product/_product.py index 87fb4da68f..537ef8cf05 100644 --- a/eodag/api/product/_product.py +++ b/eodag/api/product/_product.py @@ -141,7 +141,7 @@ def __init__( or properties.get("_collection") ) self.location = self.remote_location = properties.get("eodag:download_link", "") - self.assets = AssetsDict(self) + self.assets = AssetsDict(self, properties.pop("assets")) self.properties = { key: value for key, value in properties.items() @@ -336,14 +336,24 @@ def _register_downloader_from_manager(self, plugins_manager: PluginManager) -> N the download and authentication plugins. """ download_plugin = plugins_manager.get_download_plugin(self) - if len(self.assets) > 0: - matching_url = next(iter(self.assets.values()))["href"] - elif self.properties.get("order:status") != ONLINE_STATUS: - matching_url = self.properties.get( - "eodag:order_link" - ) or self.properties.get("eodag:download_link") + + assets_values = self.assets.values() + is_there_download_link = any( + assets_val.key == "eodag:download_link" for assets_val in assets_values + ) + + # check url of property "order:status" and asset "eodag:download_link" first + # since other assets can have paths not matching plugin matching_url pattern + if self.properties.get("order:status") != ONLINE_STATUS and ( + (order_link := self.properties.get("eodag:order_link")) is not None + ): + matching_url = order_link + elif not assets_values: + matching_url = None + elif is_there_download_link: + matching_url = self.assets["eodag:download_link"]["href"] else: - matching_url = self.properties.get("eodag:download_link") + matching_url = next(iter(assets_values))["href"] try: auth_plugin = next( diff --git a/eodag/api/product/metadata_mapping.py b/eodag/api/product/metadata_mapping.py old mode 100644 new mode 100755 index 780bf2f167..99a15ced77 --- a/eodag/api/product/metadata_mapping.py +++ b/eodag/api/product/metadata_mapping.py @@ -165,6 +165,7 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str: - ``literalize_unicode``: convert a string to its raw Unicode literal form - ``not_available``: replace value with "Not Available" - ``recursive_sub_str``: recursively substitue in the structure (e.g. dict) values matching a regex + - ``dict_update``: update a dictionary with a list converted to a dictionary - ``remove_extension``: on a string that contains dots, only take the first part of the list obtained by splitting the string on dots - ``replace_str``: execute "string".replace(old, new) @@ -189,6 +190,9 @@ def format_metadata(search_param: str, *args: Any, **kwargs: Any) -> str: - ``to_rounded_wkt``: simplify the WKT of a geometry - ``to_title``: Convert a string to title case - ``to_upper``: Convert a string to uppercase + - ``assets_list_to_dict``: convert a list of assets into a dictionary + - ``assets_list_to_dict_and_update``: convert a list of assets into a dictionary and update it with + another dictionary :param search_param: The string to be formatted :param args: (optional) Additional arguments to use in the formatting process @@ -695,8 +699,12 @@ def convert_recursive_sub_str( def convert_dict_update( input_dict: dict[Any, Any], args: str ) -> dict[Any, Any]: - """Converts""" - new_items_list = ast.literal_eval(args) + """Updates a dictionary with a list converted to a dictionary""" + # if the value was not found, consider it as an empty dictionary + if input_dict == NOT_AVAILABLE: + input_dict = {} + + new_items_list = ast.literal_eval(args.strip()) new_items_dict = nested_pairs2dict(new_items_list) @@ -706,7 +714,7 @@ def convert_dict_update( def convert_dict_filter( input_dict: dict[Any, Any], jsonpath_filter_str: str ) -> dict[Any, Any]: - """Fitlers dict items using jsonpath""" + """Filters dict items using jsonpath""" jsonpath_filter = string_to_jsonpath(jsonpath_filter_str, force=True) if isinstance(jsonpath_filter, str) or not isinstance(input_dict, dict): @@ -1022,52 +1030,105 @@ def convert_assets_list_to_dict( {"href": "foo", "title": "asset1", "name": "foo-name"}, {"href": "bar", "title": "path/to/asset1", "name": "bar-name"}, {"href": "baz", "title": "path/to/asset2", "name": "baz-name"}, - {"href": "qux", "title": "asset3", "name": "qux-name"}, + {"href": "qux", "title": "asset3", "name": "qux-name"} ] and asset_name_key == "title" => { "asset1": {"href": "foo", "title": "asset1", "name": "foo-name"}, "path/to/asset1": {"href": "bar", "title": "path/to/asset1", "name": "bar-name"}, "asset2": {"href": "baz", "title": "path/to/asset2", "name": "baz-name"}, - "asset3": {"href": "qux", "title": "asset3", "name": "qux-name"}, + "asset3": {"href": "qux", "title": "asset3", "name": "qux-name"} } assets_list == [ {"href": "foo", "title": "foo-title", "name": "asset1"}, {"href": "bar", "title": "bar-title", "name": "path/to/asset1"}, {"href": "baz", "title": "baz-title", "name": "path/to/asset2"}, - {"href": "qux", "title": "qux-title", "name": "asset3"}, + {"href": "qux", "title": "qux-title", "name": "asset3"} ] and asset_name_key == "name" => { "asset1": {"href": "foo", "title": "foo-title", "name": "asset1"}, "path/to/asset1": {"href": "bar", "title": "bar-title", "name": "path/to/asset1"}, "asset2": {"href": "baz", "title": "baz-title", "name": "path/to/asset2"}, - "asset3": {"href": "qux", "title": "qux-title", "name": "asset3"}, + "asset3": {"href": "qux", "title": "qux-title", "name": "asset3"} } """ asset_names: list[str] = [] assets_dict: dict[str, dict[str, str]] = {} + # create dictionary with assets full name for asset in assets_list: asset_name = asset[asset_name_key] asset_names.append(asset_name) assets_dict[asset_name] = asset - # we only keep the equivalent of the path basename in the case where the - # asset name has a path pattern and this basename is only found once + # when an asset name has a path pattern, we update its value with its basename if + # this basename is found for the first time. Otherwise, we keep it as a full path immutable_asset_indexes: list[int] = [] for i, asset_name in enumerate(asset_names): if i in immutable_asset_indexes: continue - change_asset_name = True + update_asset_name = True asset_basename = asset_name.split("/")[-1] j = i + 1 - while change_asset_name and j < len(asset_names): + while update_asset_name and j < len(asset_names): asset_tmp_basename = asset_names[j].split("/")[-1] if asset_basename == asset_tmp_basename: - change_asset_name = False + update_asset_name = False immutable_asset_indexes.extend([i, j]) j += 1 - if change_asset_name: + if update_asset_name: assets_dict[asset_basename] = assets_dict.pop(asset_name) return assets_dict + @staticmethod + def convert_assets_list_to_dict_and_update( + assets_list: list[dict[str, str]], args: str, asset_name_key: str = "title" + ) -> dict[str, dict[str, str]]: + """Combine two MetadataFormatter class methods in the following order: + - convert_assets_list_to_dict() + - convert_dict_update() + + assets_list == [ + {"href": "foo", "title": "asset1", "name": "foo-name"}, + {"href": "bar", "title": "path/to/asset1", "name": "bar-name"}, + {"href": "baz", "title": "path/to/asset2", "name": "baz-name"}, + {"href": "qux", "title": "asset3", "name": "qux-name"}, + ], asset_name_key == "title" and args == '[["eodag:download_link",[ + ["title","Full product download"], + ["href","https://downloadlink.foo"], + ["roles",["data"]], + ["type","application/zip"] + ]]]' => { + "asset1": {"href": "foo", "title": "asset1", "name": "foo-name"}, + "path/to/asset1": {"href": "bar", "title": "path/to/asset1", "name": "bar-name"}, + "asset2": {"href": "baz", "title": "path/to/asset2", "name": "baz-name"}, + "asset3": {"href": "qux", "title": "asset3", "name": "qux-name"}, + "eodag:download_link": {"title": "Full product download", "href": "https://downloadlink.foo", + "roles": ["data"], "type": "application/zip" + } + } + assets_list == [ + {"href": "foo", "title": "foo-title", "name": "asset1"}, + {"href": "bar", "title": "bar-title", "name": "path/to/asset1"}, + {"href": "baz", "title": "baz-title", "name": "path/to/asset2"}, + {"href": "qux", "title": "qux-title", "name": "asset3"}, + ], asset_name_key == "name" and args == '[["eodag:download_link",[ + ["title","Full product download"], + ["href","https://downloadlink.foo"], + ["roles",["data"]], + ["type","application/zip"] + ]]]' => { + "asset1": {"href": "foo", "title": "foo-title", "name": "asset1"}, + "path/to/asset1": {"href": "bar", "title": "bar-title", "name": "path/to/asset1"}, + "asset2": {"href": "baz", "title": "baz-title", "name": "path/to/asset2"}, + "asset3": {"href": "qux", "title": "qux-title", "name": "asset3"}, + "eodag:download_link": {"title": "Full product download", "href": "https://downloadlink.foo", + "roles": ["data"], "type": "application/zip" + } + } + """ + assets_dict = MetadataFormatter.convert_assets_list_to_dict( + assets_list, asset_name_key + ) + return MetadataFormatter.convert_dict_update(assets_dict, args) + @staticmethod def convert_wekeo_to_cop_collection(val: str, prefix: str) -> str: """Converts the name of a collection from the WEkEO format to the Copernicus format.""" diff --git a/eodag/config.py b/eodag/config.py index 6ad06b4583..da71681a59 100644 --- a/eodag/config.py +++ b/eodag/config.py @@ -430,9 +430,6 @@ class MetadataPreMapping(TypedDict, total=False): flatten_top_dirs: bool #: :class:`~eodag.plugins.download.base.Download` Level in extracted path tree where to find data archive_depth: int - #: :class:`~eodag.plugins.download.base.Download` Whether ignore assets and download using ``eodag:download_link`` - #: or not - ignore_assets: bool #: :class:`~eodag.plugins.download.base.Download` Collection specific configuration products: dict[str, dict[str, Any]] #: :class:`~eodag.plugins.download.base.Download` Number of maximum workers allowed for parallel downloads diff --git a/eodag/plugins/apis/ecmwf.py b/eodag/plugins/apis/ecmwf.py index 26ca4ab570..a1590569be 100644 --- a/eodag/plugins/apis/ecmwf.py +++ b/eodag/plugins/apis/ecmwf.py @@ -199,17 +199,19 @@ def download( ) # Prepare download - fs_path, record_filename = self._prepare_download( + fs_path, record_filenames = self._prepare_download( product, progress_callback=progress_callback, **kwargs, ) - if not fs_path or not record_filename: + if not fs_path or not record_filenames: if fs_path: product.location = path_to_uri(fs_path) return fs_path + record_filename = record_filenames["eodag:download_link"] + new_fs_path = os.path.join( os.path.dirname(fs_path), sanitize(product.properties["title"]) ) diff --git a/eodag/plugins/apis/usgs.py b/eodag/plugins/apis/usgs.py old mode 100644 new mode 100755 index 6549d8a158..84289c5e8e --- a/eodag/plugins/apis/usgs.py +++ b/eodag/plugins/apis/usgs.py @@ -45,6 +45,7 @@ GENERIC_COLLECTION, USER_AGENT, ProgressCallback, + deepcopy, format_dict_items, path_to_uri, ) @@ -110,6 +111,28 @@ def __init__(self, provider: str, config: PluginConfig) -> None: result_type=getattr(self.config, "result_type", "json"), ) + # parse jsonpath on init: collection specific metadata-mapping + for collection in self.config.products.keys(): + if "metadata_mapping" in self.config.products[collection].keys(): + self.config.products[collection][ + "metadata_mapping" + ] = mtd_cfg_as_conversion_and_querypath( + self.config.products[collection]["metadata_mapping"] + ) + # Complete and ready to use collection specific metadata-mapping + collection_metadata_mapping = deepcopy(self.config.metadata_mapping) + + # from current product, updated mapping at the end + for metadata, mapping in self.config.products[collection][ + "metadata_mapping" + ].items(): + collection_metadata_mapping.pop(metadata, None) + collection_metadata_mapping[metadata] = mapping + + self.config.products[collection][ + "metadata_mapping" + ] = collection_metadata_mapping + def authenticate(self) -> None: """Login to usgs api @@ -162,9 +185,9 @@ def query( self.authenticate() - collection_def_params = self.config.products.get( # type: ignore + collection_def_params: dict[str, Any] = self.config.products.get( collection, - self.config.products[GENERIC_COLLECTION], # type: ignore + self.config.products[GENERIC_COLLECTION], ) usgs_collection = format_dict_items(collection_def_params, **kwargs)[ "_collection" @@ -273,7 +296,7 @@ def query( result["collection"] = usgs_collection product_properties = properties_from_json( - result, self.config.metadata_mapping + result, self.get_metadata_mapping(collection) ) final.append( @@ -334,16 +357,18 @@ def download( ) kwargs["output_extension"] = kwargs.get("output_extension", output_extension) - fs_path, record_filename = self._prepare_download( + fs_path, record_filenames = self._prepare_download( product, progress_callback=progress_callback, **kwargs, ) - if not fs_path or not record_filename: + if not fs_path or not record_filenames: if fs_path: product.location = path_to_uri(fs_path) return fs_path + record_filename = record_filenames["eodag:download_link"] + self.authenticate() if "dds" in product.properties.get("downloadSystem", ""): diff --git a/eodag/plugins/download/aws.py b/eodag/plugins/download/aws.py index d66aa2e14e..689821a7af 100644 --- a/eodag/plugins/download/aws.py +++ b/eodag/plugins/download/aws.py @@ -202,8 +202,6 @@ class AwsDownload(Download): * :attr:`~eodag.config.PluginConfig.s3_endpoint` (``str``): s3 endpoint url * :attr:`~eodag.config.PluginConfig.flatten_top_dirs` (``bool``): if the directory structure should be flattened; default: ``True`` - * :attr:`~eodag.config.PluginConfig.ignore_assets` (``bool``): ignore assets and download - using ``eodag:download_link``; default: ``False`` * :attr:`~eodag.config.PluginConfig.ssl_verify` (``bool``): if the ssl certificates should be verified in requests; default: ``True`` * :attr:`~eodag.config.PluginConfig.bucket_path_level` (``int``): at which level of the @@ -255,7 +253,6 @@ def download( file or with environment variables. :returns: The absolute path to the downloaded product in the local filesystem """ - if progress_callback is None: logger.info( "Progress bar unavailable, please call product.download() instead of plugin.download()" @@ -263,22 +260,26 @@ def download( progress_callback = ProgressCallback(disable=True) # prepare download & create dirs (before updating metadata) - product_local_path, record_filename = self._download_preparation( + product_local_path, record_filenames = self._download_preparation( product, progress_callback=progress_callback, **kwargs ) - if not record_filename or not product_local_path: + if not record_filenames or not product_local_path: return product_local_path + asset_filter = kwargs.get("asset") + product_conf = getattr(self.config, "products", {}).get(product.collection, {}) - # do not try to build SAFE if asset filter is used - asset_filter = kwargs.get("asset") - if asset_filter: - build_safe = False - ignore_assets = False - else: - build_safe = product_conf.get("build_safe", False) - ignore_assets = getattr(self.config, "ignore_assets", False) + # do not try to build SAFE if there is asset "eodag_download_link" or asset filter is used + is_there_download_link = any( + assets_val.key == "eodag:download_link" + for assets_val in product.assets.get_values(asset_filter) + ) + build_safe = ( + False + if is_there_download_link or asset_filter is not None + else product_conf.get("build_safe", False) + ) # product conf overrides provider conf for "flatten_top_dirs" flatten_top_dirs = product_conf.get( @@ -291,7 +292,6 @@ def download( bucket_names_and_prefixes = self._get_bucket_names_and_prefixes( product, asset_filter, - ignore_assets, product_conf.get("complementary_url_key", []), ) @@ -330,9 +330,6 @@ def download( unique_product_chunks = self._get_unique_products( updated_bucket_names_and_prefixes, authenticated_objects, - asset_filter, - ignore_assets, - product, raise_error=raise_error, ) @@ -412,10 +409,11 @@ def download_chunk(product_chunk: Any) -> None: self.check_manifest_file_list(product_local_path) if asset_filter is None: - # save hash/record file - with open(record_filename, "w") as fh: - fh.write(product.remote_location) - logger.debug("Download recorded in %s", record_filename) + for asset_key, asset in product.assets.items(): + # save hash/record file + with open(record_filenames[asset_key], "w") as fh: + fh.write(asset.remote_location) + logger.debug("Download recorded in %s", record_filenames[asset_key]) product.location = path_to_uri(product_local_path) @@ -502,23 +500,23 @@ def _download_preparation( product: EOProduct, progress_callback: ProgressCallback, **kwargs: Unpack[DownloadConf], - ) -> tuple[Optional[str], Optional[str]]: + ) -> tuple[Optional[str], Optional[dict[str, str]]]: """ Preparation for the download: - - check if file was already downloaded - - get file path + - check if assets were already downloaded + - get files common path - create directories :param product: product to be downloaded :param progress_callback: progress callback to be used :param kwargs: additional arguments - :return: local path and file name + :return: local path and file names """ - product_local_path, record_filename = self._prepare_download( + product_local_path, record_filenames = self._prepare_download( product, progress_callback=progress_callback, **kwargs ) - if not product_local_path or not record_filename: + if not product_local_path or not record_filenames: if product_local_path: product.location = path_to_uri(product_local_path) return product_local_path, None @@ -529,7 +527,7 @@ def _download_preparation( # create product dest dir if not os.path.isdir(product_local_path): os.makedirs(product_local_path) - return product_local_path, record_filename + return product_local_path, record_filenames def _configure_safe_build(self, build_safe: bool, product: EOProduct): """ @@ -571,56 +569,73 @@ def _configure_safe_build(self, build_safe: bool, product: EOProduct): "SAFE metadata fetch format %s not implemented" % fetch_format ) + def _get_bucket_name_and_prefix( + self, product: EOProduct, url: Optional[str] = None + ) -> tuple[str, Optional[str]]: + """Extract bucket name and prefix from product URL + + :param product: The EO product to download + :param url: (optional) URL to use as product.location + :returns: bucket_name and prefix as str + """ + if url is None: + url = product.location + + bucket_path_level = getattr(self.config, "bucket_path_level", None) + + bucket, prefix = get_bucket_name_and_prefix( + url=url, bucket_path_level=bucket_path_level + ) + + if bucket is None: + bucket = ( + getattr(self.config, "products", {}) + .get(product.collection, {}) + .get("default_bucket", "") + ) + + return bucket, prefix + def _get_bucket_names_and_prefixes( self, product: EOProduct, asset_filter: Optional[str], - ignore_assets: bool, complementary_url_keys: list[str], ) -> list[tuple[str, Optional[str]]]: """ - Retrieves the bucket names and path prefixes for the assets + Retrieves the bucket names and path prefixes for either only the asset "eodag:download_link" + or all of the assets. :param product: product for which the assets shall be downloaded :param asset_filter: text for which the assets should be filtered - :param ignore_assets: if product instead of individual assets should be used + :param complementary_url_keys: properties keys pointing to additional urls of content to download :return: tuples of bucket names and prefixes """ - # if assets are defined, use them instead of scanning product.location - if len(product.assets) > 0 and not ignore_assets: - if asset_filter: - filter_regex = re.compile(asset_filter) - assets_keys = getattr(product, "assets", {}).keys() - assets_keys = list(filter(filter_regex.fullmatch, assets_keys)) - filtered_assets = { - a_key: getattr(product, "assets", {})[a_key] - for a_key in assets_keys - } - assets_values = [a for a in filtered_assets.values() if "href" in a] - if not assets_values: - raise NotAvailableError( - rf"No asset key matching re.fullmatch(r'{asset_filter}') was found in {product}" - ) - else: - assets_values = product.assets.values() + bucket_names_and_prefixes = [] - bucket_names_and_prefixes = [] - for complementary_url in assets_values: - bucket_names_and_prefixes.append( - self.get_product_bucket_name_and_prefix( - product, complementary_url.get("href", "") - ) + assets_values = product.assets.get_values(asset_filter) + is_there_download_link = any( + assets_val.key == "eodag:download_link" for assets_val in assets_values + ) + + # either add only the asset "eodag:download_link" or all of the assets + if is_there_download_link: + bucket_names_and_prefixes.append( + self._get_bucket_name_and_prefix( + product, product.assets["eodag:download_link"]["href"] ) + ) else: - bucket_names_and_prefixes = [ - self.get_product_bucket_name_and_prefix(product) - ] + for asset in assets_values: + bucket_names_and_prefixes.append( + self._get_bucket_name_and_prefix(product, asset["href"]) + ) # add complementary urls try: for complementary_url_key in complementary_url_keys or []: bucket_names_and_prefixes.append( - self.get_product_bucket_name_and_prefix( + self._get_bucket_name_and_prefix( product, product.properties[complementary_url_key] ) ) @@ -635,9 +650,6 @@ def _get_unique_products( self, bucket_names_and_prefixes: list[tuple[str, Optional[str]]], authenticated_objects: dict[str, Any], - asset_filter: Optional[str], - ignore_assets: bool, - product: EOProduct, raise_error: bool = True, ) -> set[Any]: """ @@ -645,9 +657,6 @@ def _get_unique_products( :param bucket_names_and_prefixes: list of bucket names and corresponding path prefixes :param authenticated_objects: available objects per bucket - :param asset_filter: text for which assets should be filtered - :param ignore_assets: if product instead of individual assets should be used - :param product: product that shall be downloaded :param raise_error: raise error if there is nothing to download :return: set of product chunks that can be downloaded """ @@ -661,20 +670,6 @@ def _get_unique_products( unique_product_chunks = set(product_chunks) - # if asset_filter is used with ignore_assets, apply filtering on listed prefixes - if asset_filter and ignore_assets: - filter_regex = re.compile(asset_filter) - unique_product_chunks = set( - filter( - lambda c: filter_regex.search(os.path.basename(c.key)), - unique_product_chunks, - ) - ) - if not unique_product_chunks and raise_error: - raise NotAvailableError( - rf"No file basename matching re.fullmatch(r'{asset_filter}') was found in {product.remote_location}" - ) - if not unique_product_chunks and raise_error: raise NoMatchingCollection("No product found to download.") @@ -721,7 +716,7 @@ def _stream_download_dict( #### SAFE Archive Support: - If the collection supports SAFE structure and no `asset_regex` is specified (i.e., full product download), + If the collection supports SAFE structure and no `asset_filter` is specified (i.e., full product download), the method attempts to reconstruct a valid SAFE archive layout in the streamed output. :param product: The EO product to download. @@ -735,21 +730,36 @@ def _stream_download_dict( - "zip": always returns a ZIP archive. :returns: A `StreamResponse` object containing the streamed download and appropriate headers. """ - asset_regex = kwargs.get("asset") + if not getattr(product, "assets", None) or len(product.assets) == 0: + logger.error( + "No asset available to download, please check the provider configuration \ + (An asset_mapping must be added if the provide does not return any assets)!" + ) + raise MisconfiguredError( + "No asset available to download, please check the provider configuration!" + ) + + asset_filter = kwargs.get("asset") + assets_values = product.assets.get_values(asset_filter=asset_filter or "") product_conf = getattr(self.config, "products", {}).get(product.collection, {}) + # do not try to build SAFE if there is asset "eodag_download_link" or asset filter is used + is_there_download_link = any( + assets_val.key == "eodag:download_link" for assets_val in assets_values + ) build_safe = ( - False if asset_regex is not None else product_conf.get("build_safe", False) + False + if is_there_download_link or asset_filter is not None + else product_conf.get("build_safe", False) ) - ignore_assets = getattr(self.config, "ignore_assets", False) + # xtra metadata needed for SAFE product self._configure_safe_build(build_safe, product) - + # bucket names and prefixes bucket_names_and_prefixes = self._get_bucket_names_and_prefixes( product, - asset_regex, - ignore_assets, + asset_filter, product_conf.get("complementary_url_key", []), ) @@ -765,11 +775,7 @@ def _stream_download_dict( # downloadable files product_objects = self._get_unique_products( - bucket_names_and_prefixes, - authenticated_objects, - asset_regex, - ignore_assets, - product, + bucket_names_and_prefixes, authenticated_objects ) # check if auth is a S3 resource by verifying it has the meta.client attribute. @@ -794,8 +800,7 @@ def _stream_download_dict( common_path = os.path.dirname(common_path) assets_by_path = { - a.get("href", "").split("s3://")[-1]: a - for a in product.assets.get_values(asset_filter=asset_regex or "") + a.get("href", "").split("s3://")[-1]: a for a in assets_values } files_info = [] @@ -849,33 +854,6 @@ def _get_commonpath( ) return os.path.commonpath(chunk_paths) - def get_product_bucket_name_and_prefix( - self, product: EOProduct, url: Optional[str] = None - ) -> tuple[str, Optional[str]]: - """Extract bucket name and prefix from product URL - - :param product: The EO product to download - :param url: (optional) URL to use as product.location - :returns: bucket_name and prefix as str - """ - if url is None: - url = product.location - - bucket_path_level = getattr(self.config, "bucket_path_level", None) - - bucket, prefix = get_bucket_name_and_prefix( - url=url, bucket_path_level=bucket_path_level - ) - - if bucket is None: - bucket = ( - getattr(self.config, "products", {}) - .get(product.collection, {}) - .get("default_bucket", "") - ) - - return bucket, prefix - def check_manifest_file_list(self, product_path: str) -> None: """Checks if products listed in manifest.safe exist""" manifest_path_list = [ diff --git a/eodag/plugins/download/base.py b/eodag/plugins/download/base.py index f8eee36f11..e6392fdd0e 100644 --- a/eodag/plugins/download/base.py +++ b/eodag/plugins/download/base.py @@ -168,32 +168,44 @@ def _prepare_download( product: EOProduct, progress_callback: Optional[ProgressCallback] = None, **kwargs: Unpack[DownloadConf], - ) -> tuple[Optional[str], Optional[str]]: - """Check if file has already been downloaded, and prepare product download + ) -> tuple[Optional[str], Optional[dict[str, str]]]: + """Check if assets of product have already been downloaded, and prepare product download :param product: The EO product to download :param progress_callback: (optional) A progress callback - :returns: fs_path, record_filename + :returns: fs_path, record_filenames """ - if product.location != product.remote_location: - fs_path = uri_to_path(product.location) - # The fs path of a product is either a file (if 'extract' config is False) or a directory - if os.path.isfile(fs_path) or os.path.isdir(fs_path): - logger.info( - f"Product already present on this platform. Identifier: {fs_path}", - ) - # Do not download data if we are on site. Instead give back the absolute path to the data - return fs_path, None - - url = product.remote_location - if not url: - logger.debug( - f"Unable to get download url for {product}, skipping download", + if not getattr(product, "assets", None) or len(product.assets) == 0: + logger.error( + "No asset available to download, please check the provider configuration \ + (An asset_mapping must be added if the provide does not return any assets)!" + ) + raise MisconfiguredError( + "No asset available to download, please check the provider configuration!" ) - return None, None - logger.info( - f"Download url: {url}", - ) + + assets_values = product.assets.get_values(kwargs.get("asset")) + + if any(assets_val.key == "eodag:download_link" for assets_val in assets_values): + assets_values = product.assets["eodag:download_link"] + + already_downloaded_assets = [] + fs_path = "" + for asset_key, asset in product.assets.items(): + if asset not in assets_values: + continue + if asset.location != asset.remote_location: + fs_path = uri_to_path(asset.location) + # The fs path of a product is either a file (if 'extract' config is False) or a directory + if os.path.isfile(fs_path) or os.path.isdir(fs_path): + logger.info( + f"Asset {asset_key} already present on this platform. Identifier: {fs_path}", + ) + # Do not download data if we are on site. Instead give back the absolute path to the data + already_downloaded_assets.append(asset_key) + if len(already_downloaded_assets) == len(assets_values): + logger.info(f"All assets of product {product} have already been downloaded") + return os.path.dirname(fs_path), None output_dir = ( kwargs.pop("output_dir", None) @@ -231,66 +243,94 @@ def _prepare_download( logger.warning( f"Unable to create records directory. Got:\n{tb.format_exc()}", ) - url_hash = hashlib.md5(url.encode("utf-8")).hexdigest() - old_record_filename = os.path.join(download_records_dir, url_hash) - record_filename = os.path.join( - download_records_dir, self.generate_record_hash(product) - ) - if os.path.isfile(old_record_filename): - os.rename(old_record_filename, record_filename) # path with or without extension path_obj = Path(fs_path) matched_paths = list(path_obj.parent.glob(f"{path_obj.stem}.*")) fs_path_with_ext = matched_paths[0] if matched_paths else fs_path - if ( - os.path.isfile(record_filename) - and fs_path_with_ext - and os.path.isfile(fs_path_with_ext) - ): - logger.info( - f"Product already downloaded: {fs_path_with_ext}", - ) - return ( - self._finalize( - str(fs_path_with_ext), progress_callback=progress_callback, **kwargs - ), - None, - ) - elif os.path.isfile(record_filename) and os.path.isdir(fs_dir_path): - logger.info( - f"Product already downloaded: {fs_dir_path}", - ) - return ( - self._finalize( - fs_dir_path, progress_callback=progress_callback, **kwargs - ), - None, - ) - # Remove the record file if fs_path is absent (e.g. it was deleted while record wasn't) - elif os.path.isfile(record_filename): - logger.debug( - f"Record file found ({record_filename}) but not the actual file", - ) - logger.debug( - f"Removing record file : {record_filename}", + + record_filenames = { + asset_key: os.path.join( + download_records_dir, self.generate_records_hashs(product)[asset_key] ) - os.remove(record_filename) + for asset_key, asset in product.assets.items() + if asset not in assets_values + } + asset_paths = [] + for asset_key, asset in product.assets.items(): + if asset not in assets_values: + continue + url = asset.remote_location + url_hash = hashlib.md5(url.encode("utf-8")).hexdigest() + old_record_filename = os.path.join(download_records_dir, url_hash) + record_filename = record_filenames[asset_key] + if os.path.isfile(old_record_filename): + os.rename(old_record_filename, record_filename) - return fs_path, record_filename + if ( + os.path.isfile(record_filename) + and fs_path_with_ext + and os.path.isfile(fs_path_with_ext) + ): + logger.info( + f"Asset {asset_key} of product {product} already downloaded", + ) + asset_paths.append( + self._finalize( + fs_path, progress_callback=progress_callback, **kwargs + ) + ) - def generate_record_hash(self, product: EOProduct) -> str: - """Generate the record hash of the given product. + elif os.path.isfile(record_filename) and os.path.isdir(fs_dir_path): + logger.info( + f"Asset {asset_key} of product {product} already downloaded: {fs_dir_path}", + ) + asset_paths.append( + self._finalize( + fs_dir_path, progress_callback=progress_callback, **kwargs + ) + ) + # Remove the record file if fs_path is absent (e.g. it was deleted while record wasn't) + elif os.path.isfile(record_filename): + logger.debug( + f"Record file found ({record_filename}) but not the actual file", + ) + logger.debug( + f"Removing record file : {record_filename}", + ) + os.remove(record_filename) + if len(asset_paths) == len(assets_values): + logger.info(f"All assets of product {product} have already been downloaded") + return os.path.dirname(asset_paths[0]), None + + return fs_path, record_filenames + + def generate_records_hashs(self, product: EOProduct) -> dict[str, str]: + """Generate the record hash of the assets of the given product. The MD5 hash is built from the product's ``collection`` and ``properties['id']`` attributes - (``hashlib.md5((product.collection+"-"+product.properties['id']).encode("utf-8")).hexdigest()``) + and from the asset key + (``hashlib.md5((product.collection+"-"+product.properties['id']+"-"+asset.key) + .encode("utf-8")).hexdigest()``) - :param product: The product to calculate the record hash - :returns: The MD5 hash + :param product: The product to calculate the asset hashes + :returns: dict of MD5 hashes """ - # In some unit tests, `product.collection` is `None` and `product.properties["id"]` is `ìnt` - product_hash = str(product.collection) + "-" + str(product.properties["id"]) - return hashlib.md5(product_hash.encode("utf-8")).hexdigest() + asset_hashes = {} + for asset_key in product.assets: + # In some unit tests, `product.collection` is `None` and `product.properties["id"]` is `ìnt` + asset_hash = ( + str(product.collection) + + "-" + + str(product.properties["id"]) + + "-" + + asset_key + ) + asset_hashes[asset_key] = hashlib.md5( + asset_hash.encode("utf-8") + ).hexdigest() + + return asset_hashes def _resolve_archive_depth(self, product_path: str) -> str: """Update product_path using archive_depth from provider configuration. @@ -522,22 +562,26 @@ def download_all( progress_callback.refresh() # anticipate nested tasks to download assets in parallel for at least one product + try: + assets_values = product.assets.get_values(kwargs.get("asset", None)) + except NotAvailableError as e: + if kwargs.get("asset") is not None: + raise NotAvailableError(e).with_traceback(e.__traceback__) nested_asset_downloads = any( product for product in products if ( product.downloader and product.downloader.config.type == "AwsDownload" - or len(product.assets) > 0 - and ( - not getattr(self.config, "ignore_assets", False) - or kwargs.get("asset") is not None + or not any( + assets_val.key == "eodag:download_link" + for assets_val in assets_values ) ) ) with progress_callback as bar: - while "Loop until all products are download or timeout is reached": + while "Loop until all products are downloaded or timeout is reached": # try downloading each product in parallel before retry # Download products in batches to handle nested tasks to download assets in parallel. diff --git a/eodag/plugins/download/http.py b/eodag/plugins/download/http.py index df49ae9b28..f09f326cdf 100644 --- a/eodag/plugins/download/http.py +++ b/eodag/plugins/download/http.py @@ -110,9 +110,6 @@ class HTTPDownload(Download): default: ``1`` * :attr:`~eodag.config.PluginConfig.flatten_top_dirs` (``bool``): if the directory structure should be flattened; default: ``True`` - * :attr:`~eodag.config.PluginConfig.ignore_assets` (``bool``): ignore assets and download using - eodag:download_link; - default: ``False`` * :attr:`~eodag.config.PluginConfig.timeout` (``int``): time to wait until request timeout in seconds; default: ``5`` * :attr:`~eodag.config.PluginConfig.ssl_verify` (``bool``): if the ssl certificates should be verified in @@ -258,9 +255,12 @@ def order_response_process( {"json": json_response, "headers": {**response.headers}}, on_response_mm_jsonpath, ) + + # update product with available properties product.properties.update( {k: v for k, v in properties_update.items() if v != NOT_AVAILABLE} ) + # the job id becomes the product id for EcmwfSearch products if "ORDERABLE" in product.properties.get("id", ""): product.properties["id"] = product.properties.get( @@ -271,10 +271,20 @@ def order_response_process( + "_" + product.properties["id"] ) + + # if "eodag:download_link" property has been found from the response, + # update the asset "eodag:download_link" and product locations. + # Then remove the property since it will not be useful anymore if "eodag:download_link" in product.properties: - product.remote_location = product.location = product.properties[ + product.assets["eodag:download_link"] = { + "href": product.properties.pop("eodag:download_link"), + "title": properties_update["title"], + "type": product.assets["eodag:download_link"]["type"], + "roles": ["data"], + } + product.remote_location = product.location = product.assets[ "eodag:download_link" - ] + ]["href"] logger.debug(f"Product location updated to {product.location}") return json_response @@ -582,12 +592,23 @@ def _request( f"Could not parse result after order success. Please search and download {product} again" ) from e - # update product + # update product with updated properties product.properties.update(properties_update) + + # if "eodag:download_link" property has been found from the response, + # update asset "eodag:download_link" and product locations. + # Then remove the property since it will not be useful anymore if "eodag:download_link" in properties_update: - product.location = product.remote_location = product.properties[ + product.assets["eodag:download_link"] = { + "href": product.properties.pop("eodag:download_link"), + "title": properties_update["title"], + "type": product.assets["eodag:download_link"]["type"], + "roles": ["data"], + } + product.location = product.remote_location = product.assets[ "eodag:download_link" - ] + ]["href"] + logger.debug(f"Product location updated to {product.location}") else: self.order_response_process(response, product) @@ -616,41 +637,49 @@ def download( ) progress_callback = ProgressCallback(disable=True) - fs_path, record_filename = self._prepare_download( + fs_path, record_filenames = self._prepare_download( product, progress_callback=progress_callback, **kwargs, ) - if not fs_path or not record_filename: + if not fs_path or not record_filenames: if fs_path: product.location = path_to_uri(fs_path) return fs_path - # download assets if exist instead of remote_location - if len(product.assets) > 0 and ( - not getattr(self.config, "ignore_assets", False) - or kwargs.get("asset") is not None + try: + skip_assets_download = False + assets_values = product.assets.get_values(kwargs.get("asset")) + except NotAvailableError as e: + if kwargs.get("asset") is not None: + raise NotAvailableError(e).with_traceback(e.__traceback__) + skip_assets_download = True + + # download single assets if there is no the full product asset + if not skip_assets_download and not any( + assets_val.key == "eodag:download_link" for assets_val in assets_values ): try: fs_path = self._download_assets( product, fs_path, - record_filename, + record_filenames, auth, progress_callback, executor, **kwargs, ) - if kwargs.get("asset") is None: - product.location = path_to_uri(fs_path) return fs_path except NotAvailableError as e: if kwargs.get("asset") is not None: raise NotAvailableError(e).with_traceback(e.__traceback__) - else: - pass - url = product.remote_location + if len(assets_values) > 1 and kwargs.get("asset") is not None: + logger.info("Download only the full product asset, ignoring the other ones") + + # download the full product asset + url = product.assets["eodag:download_link"].remote_location + record_filename = record_filenames["eodag:download_link"] @self._order_download_retry(product, wait, timeout) def download_request( @@ -662,7 +691,7 @@ def download_request( **kwargs: Unpack[DownloadConf], ) -> os.PathLike: is_empty = True - chunk_iterator = self._stream_download( + chunk_iterator = self._stream_download_full_product_asset( product, auth, progress_callback, **kwargs ) if fs_path is not None: @@ -786,69 +815,84 @@ def _stream_download_dict( if auth is not None and not isinstance(auth, AuthBase): raise MisconfiguredError(f"Incompatible auth plugin: {type(auth)}") + if not getattr(product, "assets", None) or len(product.assets) == 0: + logger.error( + "No asset available to download, please check the provider configuration \ + (An asset_mapping must be added if the provider does not return any assets)!" + ) + raise MisconfiguredError( + "No asset available to download, please check the provider configuration!" + ) + + try: + skip_assets_download = False + assets_values = product.assets.get_values(kwargs.get("asset")) + except NotAvailableError as e: + if kwargs.get("asset") is not None: + raise NotAvailableError(e).with_traceback(e.__traceback__) + skip_assets_download = True + # download assets if exist instead of remote_location - if len(product.assets) > 0 and ( - not getattr(self.config, "ignore_assets", False) - or kwargs.get("asset") is not None + if not skip_assets_download and not any( + assets_val.key == "eodag:download_link" for assets_val in assets_values ): executor = ThreadPoolExecutor( max_workers=getattr(self.config, "max_workers", None) ) - try: - assets_values = product.assets.get_values(kwargs.get("asset")) - with executor: - assets_stream_list = self._stream_download_assets( - product, - executor, - auth, - None, - assets_values, - **kwargs, - ) + with executor: + assets_stream_list = self._stream_download_assets( + product, + executor, + auth, + None, + assets_values, + **kwargs, + ) - # single asset - if len(assets_stream_list) == 1: - asset_stream = assets_stream_list[0] - if assets_values[0].get("type"): - asset_stream.headers["content-type"] = assets_values[0]["type"] - return asset_stream + # single asset + if len(assets_stream_list) == 1: + asset_stream = assets_stream_list[0] + if assets_values[0].get("type"): + asset_stream.headers["content-type"] = assets_values[0]["type"] + return asset_stream - # multiple assets in zip - else: - outputs_filename = ( - sanitize(product.properties["title"]) - if "title" in product.properties - else sanitize(product.properties.get("id", "download")) - ) + # multiple assets in zip + else: + outputs_filename = ( + sanitize(product.properties["title"]) + if "title" in product.properties + else sanitize(product.properties.get("id", "download")) + ) - # do not use global size if one of the assets has no size - missing_length = any(not (asset.size) for asset in assets_values) + # do not use global size if one of the assets has no size + missing_length = any(not (asset.size) for asset in assets_values) - zip_stream = ( - ZipStream(sized=True) if not missing_length else ZipStream() + zip_stream = ( + ZipStream(sized=True) if not missing_length else ZipStream() + ) + for asset_stream in assets_stream_list: + zip_stream.add( + asset_stream.content, + arcname=asset_stream.arcname, + size=asset_stream.size, ) - for asset_stream in assets_stream_list: - zip_stream.add( - asset_stream.content, - arcname=asset_stream.arcname, - size=asset_stream.size, - ) - zip_length = len(zip_stream) if not missing_length else None + zip_length = len(zip_stream) if not missing_length else None - return StreamResponse( - content=zip_stream, - media_type="application/zip", - filename=f"{outputs_filename}.zip", - size=zip_length, - ) - except NotAvailableError as e: - if kwargs.get("asset") is not None: - raise NotAvailableError(e).with_traceback(e.__traceback__) - else: - pass + return StreamResponse( + content=zip_stream, + media_type="application/zip", + filename=f"{outputs_filename}.zip", + size=zip_length, + ) + + if len(assets_values) > 1 and kwargs.get("asset") is not None: + logger.info("Download only the full product asset, ignoring the other ones") - chunk_iterator = self._stream_download(product, auth, None, **kwargs) + # download the full product asset + chunk_iterator = self._stream_download_full_product_asset( + product, auth, None, **kwargs + ) # start reading chunks to set product.headers try: @@ -957,7 +1001,7 @@ def order( product, auth ) - def _stream_download( + def _stream_download_full_product_asset( self, product: EOProduct, auth: Optional[AuthBase] = None, @@ -965,9 +1009,9 @@ def _stream_download( **kwargs: Unpack[DownloadConf], ) -> Iterator[Any]: """ - Fetches a zip file containing the assets of a given product as a stream - and returns a generator yielding the chunks of the file - + Fetches the zip file of the full product asset containing the assets + of a given product as a stream and returns a generator yielding the + chunks of the file :param product: product for which the assets should be downloaded :param auth: The configuration of a plugin of type Authentication :param progress_callback: A method or a callable object @@ -995,7 +1039,7 @@ def _stream_download( product.properties.get("eodag:download_method", "").lower() or getattr(self.config, "method", "GET").lower() ) - url = product.remote_location + url = product.assets["eodag:download_link"].remote_location if req_method == "post": # separate url & parameters parts = urlparse(url) @@ -1222,7 +1266,7 @@ def _download_assets( self, product: EOProduct, fs_dir_path: str, - record_filename: str, + record_filenames: dict[str, str], auth: Optional[AuthBase] = None, progress_callback: Optional[ProgressCallback] = None, executor: Optional[ThreadPoolExecutor] = None, @@ -1240,10 +1284,10 @@ def _download_assets( self._config_executor(executor) assets_urls = [ - a["href"] for a in getattr(product, "assets", {}).values() if "href" in a + a["href"] + for a in getattr(product, "assets", {}).values() + if "href" in a and a.key != "eodag:download_link" ] - if not assets_urls: - raise NotAvailableError("No assets available for %s" % product) assets_values = product.assets.get_values(kwargs.get("asset")) @@ -1341,10 +1385,11 @@ def download_asset(asset_stream: StreamResponse) -> None: flatten_top_directories(fs_dir_path) if kwargs.get("asset") is None: - # save hash/record file - with open(record_filename, "w") as fh: - fh.write(product.remote_location) - logger.debug("Download recorded in %s", record_filename) + for asset_key, asset in product.assets.items(): + # save hash/record file + with open(record_filenames[asset_key], "w") as fh: + fh.write(asset.remote_location) + logger.debug("Download recorded in %s", record_filenames[asset_key]) return fs_dir_path diff --git a/eodag/plugins/manager.py b/eodag/plugins/manager.py index 0acf4ed9a5..bb0620c2da 100644 --- a/eodag/plugins/manager.py +++ b/eodag/plugins/manager.py @@ -250,12 +250,19 @@ def get_auth_plugin( :returns: The Authentication plugin """ # matching url from product to download - if product is not None and len(product.assets) > 0: - matching_url = next(iter(product.assets.values()))["href"] + + # check url of asset "eodag:download_link" first since other assets + # can have paths not matching plugin matching_url pattern + if ( + product is not None + and (assets_values := product.assets.values()) + and any( + assets_val.key == "eodag:download_link" for assets_val in assets_values + ) + ): + product.assets["eodag:download_link"]["href"] elif product is not None: - matching_url = product.properties.get( - "eodag:download_link" - ) or product.properties.get("eodag:order_link") + matching_url = next(iter(assets_values))["href"] else: # search auth matching_url = getattr(associated_plugin.config, "api_endpoint", None) diff --git a/eodag/resources/providers.yml b/eodag/resources/providers.yml old mode 100644 new mode 100755 index 967ada43c0..469032b2bf --- a/eodag/resources/providers.yml +++ b/eodag/resources/providers.yml @@ -43,10 +43,18 @@ eodag:thumbnail: '$.browse[0].thumbnailPath' eodag:quicklook: '$.browse[0].browsePath' order:status: '{$.available#get_group_name((?PTrue)|(?PFalse))}' - eodag:download_link: 'https://earthexplorer.usgs.gov/download/external/options/{_collection}/{entityId}/M2M/' # metadata needed for download usgs:entityId: '$.entityId' usgs:productId: '$.id' + # create asset "eodag:download_link" using the above metadata + assets: '{$.null#dict_update( + [["eodag:download_link",[ + ["href","https://earthexplorer.usgs.gov/download/external/options/{productType}/{entityId}/M2M/"], + ["title","Full product download"], + ["type","application/gzip"], + ["roles",["data"]] + ]]] + )}' extract: True order_enabled: true max_workers: 2 @@ -71,6 +79,15 @@ _collection: landsat_etm_c2_l2 S2_MSI_L1C: _collection: SENTINEL_2A + metadata_mapping: + assets: '{$.null#dict_update( + [["eodag:download_link",[ + ["href","https://earthexplorer.usgs.gov/download/external/options/{productType}/{entityId}/M2M/"], + ["title","Full product download"], + ["type","application/zip"], + ["roles",["data"]] + ]]] + )}' GENERIC_COLLECTION: _collection: '{collection}' @@ -618,6 +635,14 @@ # The url to download the product "as is" (literal or as a template to be completed either after the search result # is obtained from the provider or during the eodag download phase) eodag:download_link: '$.properties.services.download.url' + assets: '{$.null#dict_update( + [["eodag:download_link",[ + ["href","{eodag:download_link}"], + ["title","Full product download"], + ["type","application/zip"], + ["roles",["data"]] + ]]] + )}' # order:status: must be one of succeeded, ordered, orderable order:status: '{$.properties.storage.mode#get_group_name((?Pdisk|tier2)|(?Pstaging)|(?Punknown|tape|tier3))}' @@ -653,6 +678,7 @@ order_enabled: true auth_error_code: 401 ssl_verify: true + timeout: 60 dl_url_params: issuerId: peps auth: !plugin @@ -1528,7 +1554,6 @@ download: !plugin type: AwsDownload s3_endpoint: https://storage.googleapis.com - ignore_assets: True ssl_verify: true products: S2_MSI_L1C: @@ -1650,6 +1675,15 @@ - $.geometry qs: $.qs eodag:order_link: 'https://ads.atmosphere.copernicus.eu/api/retrieve/v1/processes/{dataset}/execution?{{"inputs": {qs#to_geojson}}}' + assets: '{$.null#dict_update( + [["eodag:download_link",[ + ["href","Not Available"], + ["title","Full product download"], + ["type","application/zip"], + ["roles",["data"]] + ]]] + )}' + products: # See available Public Datasets in https://ads.atmosphere.copernicus.eu/cdsapp#!/search?type=dataset CAMS_GAC_FORECAST: @@ -1817,6 +1851,14 @@ geometry: - '{{"area": {geometry#to_nwse_bounds}}}' - $.geometry + assets: '{$.null#dict_update( + [["eodag:download_link",[ + ["href","Not Available"], + ["title","Full product download"], + ["type","application/zip"], + ["roles",["data"]] + ]]] + )}' qs: $.qs eodag:order_link: 'https://cds.climate.copernicus.eu/api/retrieve/v1/processes/{dataset}/execution?{{"inputs": {qs#to_geojson}}}' products: @@ -3149,6 +3191,14 @@ eodag:download_link: '$.properties.location' eodag:quicklook: '$.properties.thumbnail' eodag:thumbnail: '$.properties.thumbnail' + assets: '{$.null#dict_update( + [["eodag:download_link",[ + ["href","{eodag:download_link}"], + ["title","Full product download"], + ["type","application/zip"], + ["roles",["data"]] + ]]] + )}' title: '$.id' order:status: 'orderable' processing:level: @@ -3875,6 +3925,14 @@ - '{{"variable": "{variable}"}}' - '{$.properties.location#get_variables_from_path}' eodag:download_link: '$.properties.location' + assets: '{$.null#dict_update( + [["eodag:download_link",[ + ["href","{eodag:download_link}"], + ["title","Full product download"], + ["type","application/zip"], + ["roles",["data"]] + ]]] + )}' title: '$.id' order:status: 'orderable' eodag:order_link: 'https://gateway.prod.wekeo2.eu/hda-broker/api/v1/dataaccess/download?{{"location": "{eodag:download_link}","product_id":"{id}", "cacheable": "true", "dataset_id": "productType"}}' @@ -4867,7 +4925,14 @@ _order_body: '$.links[?rel=="retrieve"].body' eodag:order_link: "{_order_href}?{{{_order_body#replace_str(\"'\", '\"')}}}" eodag:download_link: '$.null' - assets: '$.null' + assets: '{$.null#dict_update( + [["eodag:download_link",[ + ["href","{eodag:order_link}"], + ["title","Full product download"], + ["type","application/zip"], + ["roles",["data"]] + ]]] + )}' search: !plugin type: StacSearch api_endpoint: https://hda.data.destination-earth.eu/stac/v2/search @@ -4879,7 +4944,14 @@ eodag:quicklook: '{eodag:thumbnail}' order:status: '{$.properties."order:status"#get_group_name((?Psucceeded)|(?Pshipping)|(?Porderable))}' eodag:download_link: '$.assets.downloadLink.href' - assets: '$.null' + assets: '{$.null#dict_update( + [["eodag:download_link",[ + ["href","{eodag:download_link}"], + ["title","Full product download"], + ["type","application/zip"], + ["roles",["data"]] + ]]] + )}' discover_collections: fetch_url: 'https://hda.data.destination-earth.eu/stac/v2/collections' result_type: json @@ -5509,7 +5581,14 @@ eodag:download_link: '$.properties.links.data[?(@.title="Product download")].href' # order:status set to succeeded for consistency between providers order:status: '{$.null#replace_str("Not Available","succeeded")}' - assets: '{$.properties.links.sip-entries#assets_list_to_dict}' + assets: '{$.properties.links.sip-entries#assets_list_to_dict_and_update( + [["eodag:download_link",[ + ["href","{eodag:download_link}"], + ["title","Full product download"], + ["type","application/zip"], + ["roles",["data"]] + ]]] + )}' # Additional metadata provided by the providers but that don't appear in the reference spec size: '$.properties.productInformation.size' type: '$.null' @@ -5868,7 +5947,6 @@ download: !plugin type: HTTPDownload extract: true - ignore_assets: True ssl_verify: true auth: !plugin type: TokenAuth @@ -6173,7 +6251,6 @@ _collection: '{collection}' download: !plugin type: HTTPDownload - ignore_assets: true archive_depth: 2 auth_error_code: - 401 @@ -6308,7 +6385,6 @@ download: !plugin type: AwsDownload s3_endpoint: https://s3.datalake.cnes.fr - ignore_assets: True auth: !plugin type: AwsAuth auth_error_code: 403 diff --git a/tests/integration/test_core_search_results.py b/tests/integration/test_core_search_results.py index 64629d5db4..be00afcca7 100644 --- a/tests/integration/test_core_search_results.py +++ b/tests/integration/test_core_search_results.py @@ -433,7 +433,7 @@ def test_core_search_with_count(self, mock_urlopen): }, "collection": "foo-collection", "assets": { - "downloadLink": { + "eodag:download_link": { "title": "Download link", "href": "https://stac-fastapi-eodag-server/download-link", "type": "application/zip", diff --git a/tests/units/test_assets.py b/tests/units/test_assets.py new file mode 100644 index 0000000000..47c37ddcb7 --- /dev/null +++ b/tests/units/test_assets.py @@ -0,0 +1,26 @@ +from unittest import mock + +from eodag.api.product._assets import Asset +from tests import EODagTestCase + + +class TestAssets(EODagTestCase): + def test_asset_register_downloader(self): + """eoproduct.register_donwloader must set download and auth plugins""" + product = self._dummy_product() + asset = Asset( + product=product, + key="a1", + **{"title": "a1", "href": "https://assets.test.com/a1"} + ) + + self.assertIsNone(asset.downloader) + self.assertIsNone(asset.downloader_auth) + + downloader = mock.MagicMock() + downloader_auth = mock.MagicMock() + + asset.register_downloader(downloader, downloader_auth) + + self.assertEqual(asset.downloader, downloader) + self.assertEqual(asset.downloader_auth, downloader_auth)