|
| 1 | +import os |
| 2 | +from datetime import datetime |
| 3 | +from enum import Enum |
| 4 | +from pathlib import Path |
| 5 | +from typing import Optional, Union |
| 6 | + |
| 7 | +import pandas as pd |
| 8 | +from pandas import DataFrame |
| 9 | +from pandas.core.groupby.generic import DataFrameGroupBy |
| 10 | + |
| 11 | +ROOT_DIR = os.path.abspath(__file__ + "/../../../") |
| 12 | + |
| 13 | + |
| 14 | +class DateTimePattern(Enum): |
| 15 | + UTC_TIME_PATTERN_EXTENDED = "%Y-%m-%dT%H:%M:%SZ" |
| 16 | + UTC_TIME_PATTERN = "%Y-%m-%dT%H:%MZ" |
| 17 | + PLAIN = "%Y-%m-%d %H:%M:%S" |
| 18 | + |
| 19 | + |
| 20 | +def get_absolute_path_from_project_root(path: str): |
| 21 | + if not isinstance(path, str): |
| 22 | + path = str(path) |
| 23 | + if path.startswith(ROOT_DIR): |
| 24 | + return path |
| 25 | + else: |
| 26 | + return Path(ROOT_DIR).joinpath(path) |
| 27 | + |
| 28 | + |
| 29 | +def get_absolute_path_from_working_dir(path: str | Path) -> Path: |
| 30 | + """ |
| 31 | + Given a path (as string or pathlib.Path), returns its absolute path based on |
| 32 | + the current working directory. If the path is already absolute, it's returned unchanged. |
| 33 | +
|
| 34 | + Args: |
| 35 | + - path (Union[str, Path]): The input path. |
| 36 | +
|
| 37 | + Returns: |
| 38 | + - Path: The absolute path as a pathlib.Path object. |
| 39 | + """ |
| 40 | + path_obj = Path(path) |
| 41 | + if path_obj.is_absolute(): |
| 42 | + return path_obj |
| 43 | + return path_obj.resolve() |
| 44 | + |
| 45 | + |
| 46 | +def get_file_path(path: str | Path, file_name: str): |
| 47 | + return Path(path).resolve().joinpath(file_name) |
| 48 | + |
| 49 | + |
| 50 | +def read_csv( |
| 51 | + path: str | Path, |
| 52 | + file_name: str, |
| 53 | + delimiter: str | None = None, |
| 54 | + index_col: Optional[str] = None, |
| 55 | +) -> DataFrame: |
| 56 | + full_path = get_file_path(path, file_name) |
| 57 | + if not full_path.exists(): |
| 58 | + raise IOError("File with path: " + str(full_path) + " does not exist") |
| 59 | + if index_col: |
| 60 | + return pd.read_csv( |
| 61 | + full_path, delimiter=delimiter, quotechar='"', index_col=index_col, compression="zip" |
| 62 | + ) |
| 63 | + else: |
| 64 | + return pd.read_csv(full_path, delimiter=delimiter, quotechar='"') |
| 65 | + |
| 66 | + |
| 67 | +def to_date_time(zoned_date_time: str) -> datetime: |
| 68 | + """ |
| 69 | + Converts zoned date time string with format: "yyyy-MM-dd'T'HH:mm:ss[.S[S][S]]'Z'" |
| 70 | + e.g. '2022-02-01T00:15Z[UTC]' to python datetime |
| 71 | +
|
| 72 | + Args: |
| 73 | + zoned_date_time: The zoned date time string to convert. |
| 74 | +
|
| 75 | + Returns: |
| 76 | + The converted datetime object. |
| 77 | + """ |
| 78 | + if not zoned_date_time or not isinstance(zoned_date_time, str): |
| 79 | + raise ValueError(f"Unexpected date time string: {zoned_date_time}") |
| 80 | + try: |
| 81 | + year = int(zoned_date_time[0:4]) |
| 82 | + month = int(zoned_date_time[5:7]) |
| 83 | + day = int(zoned_date_time[8:10]) |
| 84 | + hour = int(zoned_date_time[11:13]) |
| 85 | + minute = int(zoned_date_time[14:16]) |
| 86 | + except Exception: |
| 87 | + return pd.to_datetime(zoned_date_time) |
| 88 | + return datetime(year=year, month=month, day=day, hour=hour, minute=minute) |
| 89 | + |
| 90 | + |
| 91 | +def csv_to_grpd_df( |
| 92 | + file_name: str, simulation_data_path: str, delimiter: str | None = None |
| 93 | +) -> DataFrameGroupBy: |
| 94 | + """ |
| 95 | + Reads in a PSDM csv results file cleans it up and groups it by input_archive model. |
| 96 | +
|
| 97 | + Args: |
| 98 | + file_name: name of the file to read |
| 99 | + simulation_data_path: base directory of the result data |
| 100 | + delimiter: the csv delimiter |
| 101 | +
|
| 102 | + Returns: |
| 103 | + DataFrameGroupBy object of the file |
| 104 | + """ |
| 105 | + data = read_csv(simulation_data_path, file_name, delimiter) |
| 106 | + |
| 107 | + if "uuid" in data.columns: |
| 108 | + data = data.drop(columns=["uuid"]) |
| 109 | + return data.groupby(by="input_model") |
| 110 | + |
| 111 | + |
| 112 | +def check_filter(filter_start: Optional[datetime], filter_end: Optional[datetime]): |
| 113 | + if (filter_start or filter_end) and not (filter_start and filter_end): |
| 114 | + raise ValueError( |
| 115 | + "Both start and end of the filter must be provided if one is provided." |
| 116 | + ) |
| 117 | + if (filter_start and filter_end) and (filter_start > filter_end): |
| 118 | + raise ValueError("Filter start must be before end.") |
| 119 | + |
| 120 | + |
| 121 | +def df_to_csv( |
| 122 | + df: DataFrame, |
| 123 | + path: Union[str, Path], |
| 124 | + file_name: str, |
| 125 | + mkdirs=False, |
| 126 | + delimiter: str = ",", |
| 127 | + index_label="uuid", |
| 128 | + datetime_pattern=DateTimePattern.UTC_TIME_PATTERN, |
| 129 | +): |
| 130 | + df = df.copy(deep=True) |
| 131 | + if isinstance(path, Path): |
| 132 | + path = str(path) |
| 133 | + file_path = get_file_path(path, file_name) |
| 134 | + if mkdirs: |
| 135 | + os.makedirs(os.path.dirname(file_path), exist_ok=True) |
| 136 | + |
| 137 | + bool_cols = [] |
| 138 | + for col in df.columns: |
| 139 | + is_bool_col = df[col].dropna().apply(lambda x: isinstance(x, bool)).all() |
| 140 | + if is_bool_col: |
| 141 | + bool_cols.append(col) |
| 142 | + |
| 143 | + # replace True with 'true' only in boolean columns |
| 144 | + df[bool_cols] = df[bool_cols].replace({True: "true", False: "false"}) |
| 145 | + |
| 146 | + if isinstance(df.index, pd.DatetimeIndex): |
| 147 | + df.index = df.index.strftime(datetime_pattern.value) |
| 148 | + |
| 149 | + datetime_cols = df.select_dtypes( |
| 150 | + include=["datetime64[ns, UTC]", "datetime64"] |
| 151 | + ).columns |
| 152 | + for col in datetime_cols: |
| 153 | + df[col] = df[col].apply( |
| 154 | + lambda x: x.strftime(datetime_pattern.value) if not pd.isnull(x) else x |
| 155 | + ) |
| 156 | + |
| 157 | + df.to_csv(file_path, index=True, index_label=index_label, sep=delimiter) |
| 158 | + |
| 159 | + |
| 160 | +def bool_converter(maybe_bool): |
| 161 | + if isinstance(maybe_bool, bool): |
| 162 | + return maybe_bool |
| 163 | + elif isinstance(maybe_bool, str) and maybe_bool.lower() in ["true", "false"]: |
| 164 | + return maybe_bool.lower() == "true" |
| 165 | + else: |
| 166 | + raise ValueError("Cannot convert to bool: " + str(maybe_bool)) |
0 commit comments