Skip to content

Commit 9578e92

Browse files
Merge pull request #6 from AustralianBioCommons/feature-add-logging
Feature add logging
2 parents 797dd95 + 18f722a commit 9578e92

File tree

4 files changed

+122
-12
lines changed

4 files changed

+122
-12
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
__pycache__/
33
*.py[cod]
44
*$py.class
5+
logs/
6+
log/
57

68
# C extensions
79
*.so

example_notebook.ipynb

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,18 @@
1111
},
1212
{
1313
"cell_type": "code",
14-
"execution_count": 1,
14+
"execution_count": 2,
1515
"metadata": {},
1616
"outputs": [],
1717
"source": [
18-
"from gen3_metadata.gen3_metadata_parser import Gen3MetadataParser"
18+
"from gen3_metadata.logger import setup_logger\n",
19+
"from gen3_metadata.gen3_metadata_parser import Gen3MetadataParser\n",
20+
"setup_logger()"
1921
]
2022
},
2123
{
2224
"cell_type": "code",
23-
"execution_count": 2,
25+
"execution_count": 3,
2426
"metadata": {},
2527
"outputs": [],
2628
"source": [
@@ -47,7 +49,7 @@
4749
"source": [
4850
"# fetching data and returning as dataframe\n",
4951
"program_name= \"program1\"\n",
50-
"project_code= \"AusDiab_Simulated\"\n",
52+
"project_code= \"project1\"\n",
5153
"gen3metadata.fetch_data_pd(program_name, project_code, node_label= \"medical_history\")"
5254
]
5355
},

src/gen3_metadata/gen3_metadata_parser.py

Lines changed: 80 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,37 +3,48 @@
33
import pandas as pd
44
import jwt
55
import re
6+
import logging
67

78

89
class Gen3MetadataParser:
910
"""
1011
A class to interact with Gen3 metadata API for fetching and processing data.
1112
"""
1213

13-
def __init__(self, key_file_path):
14+
def __init__(self, key_file_path, logger=None):
1415
"""
1516
Initializes the Gen3MetadataParser with API URL and key file path.
1617
1718
Args:
1819
key_file_path (str): The file path to the JSON key file for authentication.
20+
logger (logging.Logger, optional): Logger instance to use. If None, uses default.
1921
"""
2022
self.key_file_path = key_file_path
2123
self.headers = {}
2224
self.data_store = {}
2325
self.data_store_pd = {}
24-
26+
if logger is None:
27+
self.logger = logging.getLogger("gen3_metadata")
28+
else:
29+
self.logger = logger
30+
self.logger.info(f"Initialized Gen3MetadataParser with key file: {key_file_path}")
31+
2532
def _add_quotes_to_json(self, input_str):
2633
try:
2734
# Try parsing as-is
35+
self.logger.debug("Attempting to parse JSON as-is.")
2836
return json.loads(input_str)
2937
except json.JSONDecodeError:
38+
self.logger.warning("JSON decode failed, attempting to fix missing quotes in JSON.")
3039
# Add quotes around keys
3140
fixed = re.sub(r'([{,]\s*)(\w+)\s*:', r'\1"\2":', input_str)
3241
# Add quotes around simple string values (skip existing quoted values)
3342
fixed = re.sub(r':\s*([A-Za-z0-9._:@/-]+)(?=\s*[},])', r': "\1"', fixed)
3443
try:
44+
self.logger.debug("Trying to parse fixed JSON string.")
3545
return json.loads(fixed)
3646
except json.JSONDecodeError as e:
47+
self.logger.error(f"Could not fix JSON: {e}")
3748
raise ValueError(f"Could not fix JSON: {e}")
3849

3950
def _load_api_key(self) -> dict:
@@ -44,24 +55,30 @@ def _load_api_key(self) -> dict:
4455
dict: The API key loaded from the JSON file.
4556
"""
4657
try:
58+
self.logger.info(f"Loading API key from file: {self.key_file_path}")
4759
# Read the file as plain text
4860
with open(self.key_file_path, "r") as f:
4961
content = f.read()
5062
# If the content does not contain any double or single quotes, try to fix it
5163
if '"' not in content and "'" not in content:
64+
self.logger.warning("API key file appears to lack quotes, attempting to fix.")
5265
return self._add_quotes_to_json(content)
5366

5467
# Read the file as JSON
5568
with open(self.key_file_path) as json_file:
69+
self.logger.debug("Parsing API key file as JSON.")
5670
return json.load(json_file)
5771
except FileNotFoundError as fnf_err:
72+
self.logger.error(f"File not found: {fnf_err}")
5873
print(f"File not found: {fnf_err}")
5974
raise
6075
except json.JSONDecodeError as json_err:
76+
self.logger.error(f"JSON decode error: {json_err}")
6177
print(f"JSON decode error: {json_err}")
6278
print("Please make sure the file contains valid JSON with quotes and proper formatting.")
6379
raise
6480
except Exception as err:
81+
self.logger.error(f"An unexpected error occurred while loading API key: {err}")
6582
print(f"An unexpected error occurred while loading API key: {err}")
6683
raise
6784

@@ -76,10 +93,11 @@ def _url_from_jwt(self, cred: dict) -> str:
7693
str: The extracted URL.
7794
"""
7895
jwt_token = cred['api_key']
96+
self.logger.debug("Decoding JWT to extract API URL.")
7997
url = jwt.decode(jwt_token, options={"verify_signature": False}).get('iss', '').removesuffix("/user")
98+
self.logger.info(f"Extracted API URL from JWT: {url}")
8099
return url
81100

82-
83101
def authenticate(self) -> dict:
84102
"""
85103
Authenticates with the Gen3 API using the loaded API key.
@@ -88,30 +106,43 @@ def authenticate(self) -> dict:
88106
dict: Headers containing the authorization token.
89107
"""
90108
try:
109+
self.logger.info("Starting authentication process.")
91110
key = self._load_api_key()
92111
api_url = self._url_from_jwt(key)
112+
self.logger.info(f"Sending authentication request to: {api_url}/user/credentials/cdis/access_token")
93113
response = requests.post(
94114
f"{api_url}/user/credentials/cdis/access_token", json=key
95115
)
116+
self.logger.debug(f"Authentication response status code: {response.status_code}")
96117
response.raise_for_status()
97118
access_token = response.json()['access_token']
98119
self.headers = {'Authorization': f"bearer {access_token}"}
99-
return print(f"Authentication successful: {response.status_code}")
120+
self.logger.info(f"Authentication successful. Access token received. Status code: {response.status_code}")
121+
print(f"Authentication successful: {response.status_code}")
100122
except requests.exceptions.HTTPError as http_err:
123+
self.logger.error(
124+
f"HTTP error occurred during authentication: {http_err} - "
125+
f"Status Code: {getattr(http_err.response, 'status_code', 'N/A')}"
126+
)
101127
print(
102128
f"HTTP error occurred during authentication: {http_err} - "
103-
f"Status Code: {response.status_code}"
129+
f"Status Code: {getattr(http_err.response, 'status_code', 'N/A')}"
104130
)
105131
raise
106132
except requests.exceptions.RequestException as req_err:
133+
self.logger.error(f"Request error occurred during authentication: {req_err}")
107134
print(f"Request error occurred during authentication: {req_err}")
108135
raise
109136
except KeyError as key_err:
137+
self.logger.error(
138+
f"Key error: {key_err} - The response may not contain 'access_token'"
139+
)
110140
print(
111141
f"Key error: {key_err} - The response may not contain 'access_token'"
112142
)
113143
raise
114144
except Exception as err:
145+
self.logger.error(f"An unexpected error occurred during authentication: {err}")
115146
print(f"An unexpected error occurred during authentication: {err}")
116147
raise
117148

@@ -125,6 +156,7 @@ def json_to_pd(self, json_data) -> pd.DataFrame:
125156
Returns:
126157
pandas.DataFrame: The converted pandas DataFrame.
127158
"""
159+
self.logger.debug("Converting JSON data to pandas DataFrame.")
128160
return pd.json_normalize(json_data)
129161

130162
def fetch_data(
@@ -146,41 +178,63 @@ def fetch_data(
146178
dict or None: The fetched data if return_data is True, otherwise None.
147179
"""
148180
try:
181+
self.logger.info(
182+
f"Fetching data for program: {program_name}, project: {project_code}, "
183+
f"node: {node_label}, API version: {api_version}"
184+
)
149185
creds = self._load_api_key()
150186
api_url = self._url_from_jwt(creds)
151187
url = (
152188
f"{api_url}/api/{api_version}/submission/{program_name}/{project_code}/"
153189
f"export/?node_label={node_label}&format=json"
154190
)
191+
self.logger.info(f"GET request to URL: {url}")
155192
response = requests.get(url, headers=self.headers)
193+
self.logger.info(f"Fetch data response status code: {response.status_code}")
156194
print(f"status code: {response.status_code}")
157195
response.raise_for_status()
158196
data = response.json()
159197

160198
key = f"{program_name}/{project_code}/{node_label}"
161199
self.data_store[key] = data
200+
self.logger.info(f"Data for {key} has been fetched and stored in data_store.")
162201

163202
if return_data:
203+
self.logger.debug(f"Returning fetched data for {key}.")
164204
return data
165205
else:
206+
self.logger.info(f"Data for {key} has been fetched and stored.")
166207
print(f"Data for {key} has been fetched and stored.")
167208
except requests.exceptions.HTTPError as http_err:
209+
self.logger.error(
210+
f"HTTP error occurred: {http_err} - "
211+
f"Status Code: {getattr(http_err.response, 'status_code', 'N/A')}"
212+
)
168213
print(
169214
f"HTTP error occurred: {http_err} - "
170-
f"Status Code: {response.status_code}"
215+
f"Status Code: {getattr(http_err.response, 'status_code', 'N/A')}"
171216
)
172217
raise
173218
except Exception as err:
219+
self.logger.error(f"An error occurred while fetching data: {err}")
174220
print(f"An error occurred: {err}")
175221
raise
176222

177223
def data_to_pd(self) -> None:
178224
"""
179225
Converts all fetched JSON data in the data store to pandas DataFrames.
180226
"""
227+
self.logger.info("Converting all fetched JSON data in data_store to pandas DataFrames.")
181228
for key, value in self.data_store.items():
229+
self.logger.info(f"Converting {key} to pandas dataframe...")
182230
print(f"Converting {key} to pandas dataframe...")
183-
self.data_store_pd[key] = self.json_to_pd(value['data'])
231+
try:
232+
self.data_store_pd[key] = self.json_to_pd(value['data'])
233+
self.logger.debug(f"Conversion successful for {key}.")
234+
except Exception as e:
235+
self.logger.error(f"Failed to convert {key} to pandas DataFrame: {e}")
236+
print(f"Failed to convert {key} to pandas DataFrame: {e}")
237+
self.logger.info("All available data converted to pandas DataFrames.")
184238
return
185239

186240
def fetch_data_pd(self, program_name, project_code, node_label, api_version="v0"):
@@ -195,8 +249,22 @@ def fetch_data_pd(self, program_name, project_code, node_label, api_version="v0"
195249
api_version (str, optional): The version of the API to use.
196250
Defaults to "v0".
197251
"""
252+
self.logger.info(
253+
f"Fetching data as pandas DataFrame for {program_name}/{project_code}/{node_label} "
254+
f"(API version: {api_version})"
255+
)
198256
data = self.fetch_data(program_name, project_code, node_label, api_version=api_version, return_data=True)
199-
return self.json_to_pd(data['data'])
257+
try:
258+
df = self.json_to_pd(data['data'])
259+
self.logger.info(
260+
f"Successfully converted data to pandas DataFrame for "
261+
f"{program_name}/{project_code}/{node_label}"
262+
)
263+
return df
264+
except Exception as e:
265+
self.logger.error(f"Failed to convert fetched data to pandas DataFrame: {e}")
266+
print(f"Failed to convert fetched data to pandas DataFrame: {e}")
267+
raise
200268

201269
def fetch_data_json(self, program_name, project_code, node_label, api_version="v0"):
202270
"""
@@ -209,4 +277,8 @@ def fetch_data_json(self, program_name, project_code, node_label, api_version="v
209277
api_version (str, optional): The version of the API to use.
210278
Defaults to "v0".
211279
"""
280+
self.logger.info(
281+
f"Fetching data as JSON for {program_name}/{project_code}/{node_label} "
282+
f"(API version: {api_version})"
283+
)
212284
return self.fetch_data(program_name, project_code, node_label, api_version=api_version, return_data=True)

src/gen3_metadata/logger.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import logging
2+
import os
3+
from datetime import datetime
4+
5+
def setup_logger(name="gen3_metadata", log_file_prefix="gen3_metadata", level=logging.INFO):
6+
"""
7+
Sets up a logger that writes to a file in the ./logs/ directory.
8+
The log file name starts with the current datetime.
9+
10+
Args:
11+
name (str): Name of the logger.
12+
log_file_prefix (str): Prefix for the log file name.
13+
level (int): Logging level.
14+
Returns:
15+
logging.Logger: Configured logger instance.
16+
"""
17+
log_dir = "./logs"
18+
os.makedirs(log_dir, exist_ok=True)
19+
dt_str = datetime.now().strftime("%Y%m%d_%H%M%S")
20+
log_file = f"{dt_str}_{log_file_prefix}.log"
21+
log_path = os.path.join(log_dir, log_file)
22+
23+
logger = logging.getLogger(name)
24+
logger.setLevel(level)
25+
26+
# Prevent adding multiple handlers if logger is called multiple times
27+
if not logger.handlers:
28+
file_handler = logging.FileHandler(log_path)
29+
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(name)s - %(message)s')
30+
file_handler.setFormatter(formatter)
31+
logger.addHandler(file_handler)
32+
33+
return logger
34+

0 commit comments

Comments
 (0)