Skip to content

Commit 66d71ab

Browse files
micaferJ535D165
andauthored
Add B2Share support #88 (#89)
Co-authored-by: Jonathan de Bruin <[email protected]>
1 parent c0abca0 commit 66d71ab

File tree

3 files changed

+27
-1
lines changed

3 files changed

+27
-1
lines changed

datahugger/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from datahugger.services import ArXivDataset
2+
from datahugger.services import B2shareDataset
23
from datahugger.services import DataDryadDataset
34
from datahugger.services import DataEuropaDataset
45
from datahugger.services import DataOneDataset
@@ -118,6 +119,7 @@
118119
"trolling.uit.no": DataverseDataset,
119120
"www.sodha.be": DataverseDataset,
120121
"www.uni-hildesheim.de": DataverseDataset,
122+
"b2share.eudat.eu": B2shareDataset,
121123
"data.europa.eu": DataEuropaDataset,
122124
}
123125

datahugger/services.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ class MendeleyDataset(DatasetDownloader):
320320
class OSFDataset(DatasetDownloader):
321321
"""Downloader for OSF repository."""
322322

323-
REGEXP_ID = r"osf\.io\/(?P<record_id>.*)/"
323+
REGEXP_ID = r"osf\.io\/(?P<record_id>[^\/]*)\/{0,1}"
324324

325325
# the base entry point of the REST API
326326
API_URL = "https://api.osf.io/v2/nodes/"
@@ -425,3 +425,23 @@ class SeaNoeDataset(DatasetDownloader):
425425
ATTR_SIZE_JSONPATH = "size"
426426
ATTR_HASH_JSONPATH = "checksum"
427427
ATTR_HASH_TYPE_VALUE = "sha256"
428+
429+
430+
class B2shareDataset(DatasetDownloader):
431+
"""Downloader for B2Share repository."""
432+
433+
REGEXP_ID = r"b2share\.eudat\.eu\/records\/(?P<record_id>[0-9a-z]+)"
434+
435+
# the base entry point of the REST API
436+
API_URL = "https://b2share.eudat.eu/api/"
437+
438+
# the files and metadata about the dataset
439+
API_URL_META = "{api_url}records/{record_id}"
440+
META_FILES_JSONPATH = "files[*]"
441+
442+
# paths to file attributes
443+
ATTR_NAME_JSONPATH = "key"
444+
ATTR_FILE_LINK_JSONPATH = "ePIC_PID"
445+
ATTR_SIZE_JSONPATH = "size"
446+
ATTR_HASH_JSONPATH = "checksum"
447+
ATTR_HASH_TYPE_VALUE = "md5"

tests/test_repositories.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ files = "AA_age.tab"
114114
location = "https://github.com/j535d165/cbsodata"
115115
files = "cbsodata-main/README.md"
116116

117+
[[b2share]]
118+
location = "https://b2share.eudat.eu/records/db2ef5890fa44c7a85af366a50de73b9"
119+
files = "2024-02-13.sav"
120+
117121
[[dataeuropa]]
118122
location = "https://data.europa.eu/data/datasets/65e092e4009f18f050b14216"
119123
files = "consolidation-wattzhub-schema-irve-statique-20240220-152202.csv"

0 commit comments

Comments
 (0)