22import csv
33import json
44import collections
5+ import sys
56
6- ACCESS_TOKEN = 'SvxcV0O7kHohjkBVHcHZ3iZmgtJvKeZPN85ZFtgrc5wa0Uup1MtYWl2HzWTw'
7- dats_folder = '/Users/amd176/Documents/Repositories/digital-commons/src/scripts/convert_to_dats/output/spew_ipums_dats_json/'
7+ if len (sys .argv ) < 3 :
8+ print ('DESCRIPTION:\n \t Update the DATS metadata with the DOI and PURL\n \t A Zenodo access token and directory path are required\n ' )
9+ print ('USAGE:\n \t python update_dats_with_doi.py <access_token> <path_to_dats_directory>\n ' )
10+ else :
11+ ACCESS_TOKEN = sys .argv [1 ]
12+ dats_folder = sys .argv [2 ]
813
9- data = csv .reader (open ('spew_mapping.csv' ))
10- # Read the column names from the first line of the file
11- fields = next (data )
12- csv_dict = {}
13- for row in data :
14- # Zip together the field names and values
15- items = zip (fields , row )
16- item = {}
17- key = ()
18- # Add the value to our dictionary
19- for (name , value ) in items :
20- item [name ] = value .strip ()
14+ data = csv .reader (open ('spew_mapping.csv' ))
15+ # Read the column names from the first line of the file
16+ fields = next (data )
17+ csv_dict = {}
18+ for row in data :
19+ # Zip together the field names and values
20+ items = zip (fields , row )
21+ item = {}
22+ key = ()
23+ # Add the value to our dictionary
24+ for (name , value ) in items :
25+ item [name ] = value .strip ()
2126
22- key = item ['Title' ]
23- csv_dict [key ] = item
27+ key = item ['Title' ]
28+ csv_dict [key ] = item
2429
25- response = requests .get ('https://zenodo.org//api/deposit/depositions' , params = {'access_token' : ACCESS_TOKEN , 'size' : 200 , 'status' : 'published' })
26- json_response = response .json ()
27- for deposition_index in range (len (json_response )):
28- id = json_response [deposition_index ]['id' ]
30+ response = requests .get ('https://zenodo.org//api/deposit/depositions' , params = {'access_token' : ACCESS_TOKEN , 'size' : 200 , 'status' : 'published' })
31+ json_response = response .json ()
32+ for deposition_index in range (len (json_response )):
33+ id = json_response [deposition_index ]['id' ]
2934
30- r = requests .get ("https://zenodo.org/api/deposit/depositions/" + str (id ),
31- params = {'access_token' : ACCESS_TOKEN })
35+ r = requests .get ("https://zenodo.org/api/deposit/depositions/" + str (id ),
36+ params = {'access_token' : ACCESS_TOKEN })
3237
33- deposition_json = r .json ()
38+ deposition_json = r .json ()
3439
35- # Get download link for access url
36- access_url = "https://zenodo.org/record/" + str (deposition_json ['record_id' ]) + "/files/" + deposition_json ['files' ][0 ]['filename' ]
40+ # Get download link for access url
41+ access_url = "https://zenodo.org/record/" + str (deposition_json ['record_id' ]) + "/files/" + deposition_json ['files' ][0 ]['filename' ]
3742
38- # Get title to cross reference with spew_mapping.csv
39- title = deposition_json ['title' ]
40- if not "RABIES" in title .upper () and not "H1N1" in title :
41- try :
42- landing_url = "http://w3id.org/spew/" + csv_dict [title ]['Anonymous Identifier' ]
43- except KeyError :
44- continue
43+ # Get title to cross reference with spew_mapping.csv
44+ title = deposition_json ['title' ]
45+ if not "RABIES" in title .upper () and not "H1N1" in title :
46+ try :
47+ landing_url = "http://w3id.org/spew/" + csv_dict [title ]['Anonymous Identifier' ]
48+ except KeyError :
49+ continue
4550
46- # Extract the name from the landing page in spew_mapping, this will allow us to access the json file
47- file_name = ()
48- old_landing_page = csv_dict [title ]['Landing Page' ].split ('/' )
49- if len (old_landing_page ) > 10 :
50- file_name = old_landing_page [8 ] + ".json"
51- else :
52- file_name = old_landing_page [7 ] + ".json"
51+ # Extract the name from the landing page in spew_mapping, this will allow us to access the json file
52+ file_name = ()
53+ old_landing_page = csv_dict [title ]['Landing Page' ].split ('/' )
54+ if len (old_landing_page ) > 10 :
55+ file_name = old_landing_page [8 ] + ".json"
56+ else :
57+ file_name = old_landing_page [7 ] + ".json"
5358
54- # Update the dats file with the correct identifier information and the access and landing URLs
55- try :
56- with open (dats_folder + file_name ) as json_file :
57- old_meta_data = json .load (json_file , object_pairs_hook = collections .OrderedDict )
58- except FileNotFoundError :
59- continue
59+ # Update the dats file with the correct identifier information and the access and landing URLs
60+ try :
61+ with open (dats_folder + file_name ) as json_file :
62+ old_meta_data = json .load (json_file , object_pairs_hook = collections .OrderedDict )
63+ except FileNotFoundError :
64+ continue
6065
6166
62- old_meta_data ['identifier' ]['identifier' ] = deposition_json ['doi_url' ]
63- old_meta_data ['identifier' ]['identifierSource' ] = "zenodo"
64- old_meta_data ['distributions' ][0 ]['access' ]['accessURL' ] = access_url
65- old_meta_data ['distributions' ][0 ]['access' ]['landingPage' ] = landing_url
67+ old_meta_data ['identifier' ]['identifier' ] = deposition_json ['doi_url' ]
68+ old_meta_data ['identifier' ]['identifierSource' ] = "zenodo"
69+ old_meta_data ['distributions' ][0 ]['access' ]['accessURL' ] = access_url
70+ old_meta_data ['distributions' ][0 ]['access' ]['landingPage' ] = landing_url
6671
67- with open (dats_folder + file_name , 'w' ) as outfile :
68- json .dump (old_meta_data , outfile , indent = 4 )
69- print ("created " + file_name )
72+ with open (dats_folder + file_name , 'w' ) as outfile :
73+ json .dump (old_meta_data , outfile , indent = 4 )
74+ print ("created " + file_name )
0 commit comments