Skip to content

Commit 5d03d91

Browse files
authored
fix: resolve all remaining parsing issues (#21)
1 parent 220e5e9 commit 5d03d91

File tree

3 files changed

+30799
-7
lines changed

3 files changed

+30799
-7
lines changed

src/regbot/fetch/clinical_trials.py

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,10 @@ class StandardAge(StrEnum):
565565

566566

567567
# these are obviously imprecise, to varying degrees, but it's what we have to work with
568-
_SECONDS_IN_DAY = 24 * 60 * 60
568+
_SECONDS_IN_MINUTE = 60
569+
_SECONDS_IN_HOUR = 60 * _SECONDS_IN_MINUTE
570+
_SECONDS_IN_DAY = 24 * _SECONDS_IN_HOUR
571+
_SECONDS_IN_WEEK = 7 * _SECONDS_IN_DAY
569572
_SECONDS_IN_MONTH = 31 * _SECONDS_IN_DAY
570573
_SECONDS_IN_YEAR = 365 * _SECONDS_IN_DAY
571574

@@ -581,8 +584,14 @@ def _age_to_timedelta(raw_age: str) -> datetime.timedelta:
581584
factor = _SECONDS_IN_YEAR
582585
elif "Month" in raw_age:
583586
factor = _SECONDS_IN_MONTH
587+
elif "Week" in raw_age:
588+
factor = _SECONDS_IN_WEEK
584589
elif "Day" in raw_age:
585590
factor = _SECONDS_IN_DAY
591+
elif "Hour" in raw_age:
592+
factor = _SECONDS_IN_HOUR
593+
elif "Minute" in raw_age:
594+
factor = _SECONDS_IN_MINUTE
586595
else:
587596
msg = f"Unable to parse '{raw_age}' as a duration"
588597
raise ValueError(msg)
@@ -879,11 +888,37 @@ def _format_study(study_input: dict) -> Study:
879888
)
880889

881890

882-
def make_fda_clinical_trials_request(url: str) -> list[Study]:
891+
def _get_id(study_response: dict, url: str, i: int) -> str | None:
892+
"""Extract NCT ID from study response
893+
894+
:param study_response: a single study response object
895+
:param url: URL used to issue request
896+
:param i: index of individual study within response to that URL
897+
:return: NCT ID if available (should be, but we're being careful)
898+
"""
899+
study_id = (
900+
study_response.get("protocolSection", {})
901+
.get("identificationModule", {})
902+
.get("nctId")
903+
)
904+
if not study_id:
905+
_logger.error(
906+
"Unable to fetch study ID for the %s 'th entry in %s. Is the JSON object malformed?",
907+
i,
908+
url,
909+
)
910+
return study_id
911+
912+
913+
def make_fda_clinical_trials_request(
914+
url: str, skip_parsing_failures: bool
915+
) -> list[Study]:
883916
"""Issue a request against provided URL for FDA Clinical Trials API
884917
885918
:param url: URL to request. This method doesn't add any additional parameters except
886919
for pagination.
920+
:param skip_parsing_failures: if ``True``, catch and suppress failures to parse
921+
study metadata
887922
:return: studies contained in API response
888923
"""
889924
results = []
@@ -899,17 +934,26 @@ def make_fda_clinical_trials_request(url: str) -> list[Study]:
899934
)
900935
raise e
901936
raw_data = r.json()
902-
results.extend(
903-
_format_study(study) for study in raw_data.get("studies", [])
904-
)
937+
for i, study in enumerate(raw_data.get("studies", [])):
938+
try:
939+
parsed_data = _format_study(study)
940+
except ValueError as e:
941+
if skip_parsing_failures:
942+
nct_id = _get_id(study, formatted_url, i)
943+
_logger.warning("Failed to parse study %s: %s", nct_id, e)
944+
continue
945+
raise e
946+
results.append(parsed_data)
905947

906948
next_page_token = raw_data.get("nextPageToken")
907949
if not next_page_token:
908950
break
909951
return results
910952

911953

912-
def get_clinical_trials(drug_name: str | None = None) -> list[Study]:
954+
def get_clinical_trials(
955+
drug_name: str | None = None, skip_parsing_failures: bool = False
956+
) -> list[Study]:
913957
"""Get data from the FDA Clinical Trials API.
914958
915959
>>> results = get_clinical_trials("imatinib")
@@ -919,6 +963,7 @@ def get_clinical_trials(drug_name: str | None = None) -> list[Study]:
919963
:param drug_name: name of drug used for trial intervention. This is passed to the
920964
API intervention parameter, which appears to search for inclusion as a substring
921965
rather than a full-span match
966+
:param skip_parsing_failures: if ``True``
922967
:return: list of matching trial descriptions
923968
"""
924969
if not drug_name:
@@ -928,4 +973,4 @@ def get_clinical_trials(drug_name: str | None = None) -> list[Study]:
928973
if drug_name:
929974
params.append(f"query.intr={drug_name}")
930975
url = f"https://clinicaltrials.gov/api/v2/studies?{'&'.join(params)}"
931-
return make_fda_clinical_trials_request(url)
976+
return make_fda_clinical_trials_request(url, skip_parsing_failures)

0 commit comments

Comments
 (0)