Skip to content

Commit 9219a44

Browse files
author
mas400
committed
updated scripts to use command line arguments
1 parent 9940453 commit 9219a44

File tree

3 files changed

+223
-206
lines changed

3 files changed

+223
-206
lines changed

src/scripts/dats_to_doi/create_spew_mapping.py

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,40 +4,47 @@
44
import json
55
import csv
66
import re
7+
import sys
78

8-
dats_folder = 'DATS FOLDER LOCATION'
9-
if os.path.isfile('spew_mapping.csv'):
10-
file = open('spew_mapping.csv', 'a+')
11-
interval = sum(1 for line in open('spew_mapping.csv'))-1
12-
wr = csv.writer(file, quoting=csv.QUOTE_ALL)
139

10+
if len(sys.argv) < 2:
11+
print('DESCRIPTION:\n\tReads SPEW metadata and creates a CSV mapping of the location code, SPEW version, and landing page URL to an anonymous identifier\n\tA directory path is required\n')
12+
print('USAGE:\n\tpython create_spew_mapping.py <path_to_dats_directory>\n')
1413
else:
15-
interval = 0
16-
file = open('spew_mapping.csv', 'w')
17-
wr = csv.writer(file, quoting=csv.QUOTE_ALL)
18-
wr.writerow(['Apollo Location Code', 'SPEW Version', 'Landing Page', 'Anonymous Identifier', 'Title'])
14+
dats_folder = sys.argv[1]
1915

20-
for filename in os.listdir(dats_folder):
21-
if filename.endswith(".json"):
22-
interval += 1
16+
if os.path.isfile('spew_mapping.csv'):
17+
file = open('spew_mapping.csv', 'a+')
18+
interval = sum(1 for line in open('spew_mapping.csv'))-1
19+
wr = csv.writer(file, quoting=csv.QUOTE_ALL)
2320

24-
# Read metadata as json
25-
with open(os.path.join(dats_folder, filename)) as json_file:
26-
json_data = json.load(json_file);
21+
else:
22+
interval = 0
23+
file = open('spew_mapping.csv', 'w')
24+
wr = csv.writer(file, quoting=csv.QUOTE_ALL)
25+
wr.writerow(['Apollo Location Code', 'SPEW Version', 'Landing Page', 'Anonymous Identifier', 'Title'])
2726

28-
# Get title
29-
title = json_data['title']
27+
for filename in os.listdir(dats_folder):
28+
if filename.endswith(".json"):
29+
interval += 1
3030

31-
# Get landing page
32-
landing_page = json_data['distributions'][0]['access']['landingPage']
31+
# Read metadata as json
32+
with open(os.path.join(dats_folder, filename)) as json_file:
33+
json_data = json.load(json_file);
3334

34-
# Get apollo location code
35-
ls_url = json_data['spatialCoverage'][0]['identifier']['identifier']
36-
location_code = int(re.search(r'\d+', ls_url).group())
35+
# Get title
36+
title = json_data['title']
3737

38-
# Get spew version
39-
version = json_data['types'][2]['platform']['value']
38+
# Get landing page
39+
landing_page = json_data['distributions'][0]['access']['landingPage']
4040

41-
wr.writerow([location_code, version, landing_page, str(interval).zfill(7), title])
41+
# Get apollo location code
42+
ls_url = json_data['spatialCoverage'][0]['identifier']['identifier']
43+
location_code = int(re.search(r'\d+', ls_url).group())
4244

43-
file.close()
45+
# Get spew version
46+
version = json_data['types'][2]['platform']['value']
47+
48+
wr.writerow([location_code, version, landing_page, str(interval).zfill(7), title])
49+
50+
file.close()

src/scripts/dats_to_doi/update_dats_with_doi.py

Lines changed: 57 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2,68 +2,73 @@
22
import csv
33
import json
44
import collections
5+
import sys
56

6-
ACCESS_TOKEN = 'SvxcV0O7kHohjkBVHcHZ3iZmgtJvKeZPN85ZFtgrc5wa0Uup1MtYWl2HzWTw'
7-
dats_folder = '/Users/amd176/Documents/Repositories/digital-commons/src/scripts/convert_to_dats/output/spew_ipums_dats_json/'
7+
if len(sys.argv) < 3:
8+
print('DESCRIPTION:\n\tUpdate the DATS metadata with the DOI and PURL\n\tA Zenodo access token and directory path are required\n')
9+
print('USAGE:\n\tpython update_dats_with_doi.py <access_token> <path_to_dats_directory>\n')
10+
else:
11+
ACCESS_TOKEN = sys.argv[1]
12+
dats_folder = sys.argv[2]
813

9-
data = csv.reader(open('spew_mapping.csv'))
10-
# Read the column names from the first line of the file
11-
fields = next(data)
12-
csv_dict = {}
13-
for row in data:
14-
# Zip together the field names and values
15-
items = zip(fields, row)
16-
item = {}
17-
key = ()
18-
# Add the value to our dictionary
19-
for (name, value) in items:
20-
item[name] = value.strip()
14+
data = csv.reader(open('spew_mapping.csv'))
15+
# Read the column names from the first line of the file
16+
fields = next(data)
17+
csv_dict = {}
18+
for row in data:
19+
# Zip together the field names and values
20+
items = zip(fields, row)
21+
item = {}
22+
key = ()
23+
# Add the value to our dictionary
24+
for (name, value) in items:
25+
item[name] = value.strip()
2126

22-
key = item['Title']
23-
csv_dict[key] = item
27+
key = item['Title']
28+
csv_dict[key] = item
2429

25-
response = requests.get('https://zenodo.org//api/deposit/depositions', params={'access_token': ACCESS_TOKEN, 'size': 200, 'status': 'published'})
26-
json_response = response.json()
27-
for deposition_index in range(len(json_response)):
28-
id = json_response[deposition_index]['id']
30+
response = requests.get('https://zenodo.org//api/deposit/depositions', params={'access_token': ACCESS_TOKEN, 'size': 200, 'status': 'published'})
31+
json_response = response.json()
32+
for deposition_index in range(len(json_response)):
33+
id = json_response[deposition_index]['id']
2934

30-
r = requests.get("https://zenodo.org/api/deposit/depositions/" + str(id),
31-
params={'access_token': ACCESS_TOKEN})
35+
r = requests.get("https://zenodo.org/api/deposit/depositions/" + str(id),
36+
params={'access_token': ACCESS_TOKEN})
3237

33-
deposition_json = r.json()
38+
deposition_json = r.json()
3439

35-
# Get download link for access url
36-
access_url = "https://zenodo.org/record/" + str(deposition_json['record_id']) + "/files/" + deposition_json['files'][0]['filename']
40+
# Get download link for access url
41+
access_url = "https://zenodo.org/record/" + str(deposition_json['record_id']) + "/files/" + deposition_json['files'][0]['filename']
3742

38-
# Get title to cross reference with spew_mapping.csv
39-
title = deposition_json['title']
40-
if not "RABIES" in title.upper() and not "H1N1" in title:
41-
try:
42-
landing_url = "http://w3id.org/spew/" + csv_dict[title]['Anonymous Identifier']
43-
except KeyError:
44-
continue
43+
# Get title to cross reference with spew_mapping.csv
44+
title = deposition_json['title']
45+
if not "RABIES" in title.upper() and not "H1N1" in title:
46+
try:
47+
landing_url = "http://w3id.org/spew/" + csv_dict[title]['Anonymous Identifier']
48+
except KeyError:
49+
continue
4550

46-
# Extract the name from the landing page in spew_mapping, this will allow us to access the json file
47-
file_name = ()
48-
old_landing_page = csv_dict[title]['Landing Page'].split('/')
49-
if len(old_landing_page) > 10:
50-
file_name = old_landing_page[8] + ".json"
51-
else:
52-
file_name = old_landing_page[7] + ".json"
51+
# Extract the name from the landing page in spew_mapping, this will allow us to access the json file
52+
file_name = ()
53+
old_landing_page = csv_dict[title]['Landing Page'].split('/')
54+
if len(old_landing_page) > 10:
55+
file_name = old_landing_page[8] + ".json"
56+
else:
57+
file_name = old_landing_page[7] + ".json"
5358

54-
# Update the dats file with the correct identifier information and the access and landing URLs
55-
try:
56-
with open(dats_folder+file_name) as json_file:
57-
old_meta_data = json.load(json_file, object_pairs_hook=collections.OrderedDict)
58-
except FileNotFoundError:
59-
continue
59+
# Update the dats file with the correct identifier information and the access and landing URLs
60+
try:
61+
with open(dats_folder+file_name) as json_file:
62+
old_meta_data = json.load(json_file, object_pairs_hook=collections.OrderedDict)
63+
except FileNotFoundError:
64+
continue
6065

6166

62-
old_meta_data['identifier']['identifier'] = deposition_json['doi_url']
63-
old_meta_data['identifier']['identifierSource'] = "zenodo"
64-
old_meta_data['distributions'][0]['access']['accessURL'] = access_url
65-
old_meta_data['distributions'][0]['access']['landingPage'] = landing_url
67+
old_meta_data['identifier']['identifier'] = deposition_json['doi_url']
68+
old_meta_data['identifier']['identifierSource'] = "zenodo"
69+
old_meta_data['distributions'][0]['access']['accessURL'] = access_url
70+
old_meta_data['distributions'][0]['access']['landingPage'] = landing_url
6671

67-
with open(dats_folder+file_name, 'w') as outfile:
68-
json.dump(old_meta_data, outfile, indent=4)
69-
print("created " + file_name)
72+
with open(dats_folder+file_name, 'w') as outfile:
73+
json.dump(old_meta_data, outfile, indent=4)
74+
print("created " + file_name)

0 commit comments

Comments
 (0)