From 9472cbfd0fd4eff41006875280ff140df8bb838e Mon Sep 17 00:00:00 2001 From: juacrumar Date: Sun, 25 Jan 2026 21:05:34 +0100 Subject: [PATCH 1/2] update the code to be compatible with pandas==3 --- .../actions/prepare_environment/action.yml | 2 +- .github/workflows/redo_regressions.yml | 1 - .../commondata/HERA_CC_318GEV/filter.py | 111 +++++++++--------- .../commondata/HERA_NC_225GEV/filter.py | 108 ++++++++--------- .../commondata/HERA_NC_251GEV/filter.py | 98 ++++++++-------- .../commondata/HERA_NC_300GEV/filter.py | 98 ++++++++-------- .../commondata/HERA_NC_318GEV/filter.py | 108 +++++++++-------- .../commondata/HERA_NC_318GEV_EAVG/filter.py | 108 +++++++++-------- nnpdf_data/nnpdf_data/commondataparser.py | 2 +- nnpdf_data/pyproject.toml | 2 +- pyproject.toml | 2 +- validphys2/src/validphys/covmats.py | 5 +- validphys2/src/validphys/fitdata.py | 7 +- validphys2/src/validphys/hyperoptplot.py | 22 +--- validphys2/src/validphys/pseudodata.py | 4 +- validphys2/src/validphys/results.py | 3 +- validphys2/src/validphys/tableloader.py | 21 ++-- .../src/validphys/tests/test_weights.py | 9 +- .../src/validphys/theorycovariance/output.py | 13 +- 19 files changed, 358 insertions(+), 366 deletions(-) diff --git a/.github/actions/prepare_environment/action.yml b/.github/actions/prepare_environment/action.yml index 8e1767f6c9..d435249270 100644 --- a/.github/actions/prepare_environment/action.yml +++ b/.github/actions/prepare_environment/action.yml @@ -32,7 +32,7 @@ runs: with: python-version: ${{ inputs.python-version }} use-mamba: true - channels: https://packages.nnpdf.science/public,conda-forge + channels: conda-forge show-channel-urls: true auto-update-conda: true activate-environment: nnpdf_environment diff --git a/.github/workflows/redo_regressions.yml b/.github/workflows/redo_regressions.yml index 34fa6a319f..b67e9a105b 100644 --- a/.github/workflows/redo_regressions.yml +++ b/.github/workflows/redo_regressions.yml @@ -34,7 +34,6 @@ jobs: echo "$NETRC_FILE" | base64 --decode > ~/.netrc conda config --remove channels defaults conda config --append channels conda-forge - conda config --prepend channels https://packages.nnpdf.science/public conda config --set show_channel_urls true conda install lhapdf pandoc - name: Install nnpdf with testing and qed extras diff --git a/nnpdf_data/nnpdf_data/commondata/HERA_CC_318GEV/filter.py b/nnpdf_data/nnpdf_data/commondata/HERA_CC_318GEV/filter.py index 166cfd9dc6..8c77ef9aba 100644 --- a/nnpdf_data/nnpdf_data/commondata/HERA_CC_318GEV/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/HERA_CC_318GEV/filter.py @@ -1,68 +1,69 @@ -from nnpdf_data.filter_utils.hera_utils import commondata #, covmat_is_close -from pathlib import Path from dataclasses import dataclass -import typing -from typing import List -import numpy as np -import pandas as pd from os import PathLike -import yaml +from pathlib import Path + +import pandas as pd + +from nnpdf_data.filter_utils.hera_utils import commondata # , covmat_is_close + @dataclass class hera_commondata(commondata): - def __init__(self, filename: str | PathLike, dataset_name: str, - process: str): - # Read the data. - file = Path(filename) - df = pd.read_table(file, sep=r"\s+") + def __init__(self, filename: str | PathLike, dataset_name: str, process: str): + # Read the data. + file = Path(filename) + df = pd.read_table(file, sep=r"\s+") - # Kinematic quantieties. - self.central_values = df["Sigma"].to_numpy() - self.kinematics = df[["x", "Q2", "y"]].to_numpy() - self.kinematic_quantities = ["x", "Q2", "y"] + # Kinematic quantieties. + self.central_values = df["Sigma"].to_numpy() + self.kinematics = df[["x", "Q2", "y"]].to_numpy() + self.kinematic_quantities = ["x", "Q2", "y"] - # Statistical uncertainties. - statistical_uncertainties = df["stat"].to_numpy() - for iunc,unc in enumerate(statistical_uncertainties): - unc = self.central_values[iunc]*unc/100 - statistical_uncertainties[iunc] = unc - self.statistical_uncertainties = statistical_uncertainties + # Statistical uncertainties. + statistical_uncertainties = df["stat"].to_numpy(copy=True) + for iunc, unc in enumerate(statistical_uncertainties): + unc = self.central_values[iunc] * unc / 100 + statistical_uncertainties[iunc] = unc + self.statistical_uncertainties = statistical_uncertainties - # Systematic uncertainties. - # remove the column containing the total uncertainty excluding - # procedural uncertainties. - df = df.drop(columns=["tot_noproc"]) - sys_uncert_col_names = list(df.columns.values)[5:] - self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy() - systematic_uncertainties = df[sys_uncert_col_names].to_numpy() - for iunc,unc in enumerate(systematic_uncertainties): - unc = self.central_values[iunc]*unc/100 - systematic_uncertainties[iunc] = unc - self.systematic_uncertainties = systematic_uncertainties - - # All uncertainties are treated as multiplicative. - systypes = [] - for name in sys_uncert_col_names: - if(name == "uncor"): - systypes.append(("MULT", "UNCORR")) - else: - systypes.append(("MULT", f"HC_{name}")) - self.systypes = systypes - self.process = process - self.dataset_name = dataset_name + # Systematic uncertainties. + # remove the column containing the total uncertainty excluding + # procedural uncertainties. + df = df.drop(columns=["tot_noproc"]) + sys_uncert_col_names = list(df.columns.values)[5:] + self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy() + systematic_uncertainties = df[sys_uncert_col_names].to_numpy() + for iunc, unc in enumerate(systematic_uncertainties): + unc = self.central_values[iunc] * unc / 100 + systematic_uncertainties[iunc] = unc + self.systematic_uncertainties = systematic_uncertainties -def main(): - hera_em = hera_commondata("./rawdata/HERA1+2_CCem.dat","HERACOMBCCEM", "DIS_CC") - hera_em.write_new_commondata(Path("data_EM-SIGMARED.yaml"), - Path("kinematics_EM-SIGMARED.yaml"), - Path("uncertainties_EM-SIGMARED.yaml")) - hera_ep = hera_commondata("./rawdata/HERA1+2_CCep.dat","HERACOMBCCEP", "DIS_CC") - hera_ep.write_new_commondata(Path("data_EP-SIGMARED.yaml"), - Path("kinematics_EP-SIGMARED.yaml"), - Path("uncertainties_EP-SIGMARED.yaml")) + # All uncertainties are treated as multiplicative. + systypes = [] + for name in sys_uncert_col_names: + if name == "uncor": + systypes.append(("MULT", "UNCORR")) + else: + systypes.append(("MULT", f"HC_{name}")) + self.systypes = systypes + self.process = process + self.dataset_name = dataset_name -if __name__ == "__main__": - main() +def main(): + hera_em = hera_commondata("./rawdata/HERA1+2_CCem.dat", "HERACOMBCCEM", "DIS_CC") + hera_em.write_new_commondata( + Path("data_EM-SIGMARED.yaml"), + Path("kinematics_EM-SIGMARED.yaml"), + Path("uncertainties_EM-SIGMARED.yaml"), + ) + hera_ep = hera_commondata("./rawdata/HERA1+2_CCep.dat", "HERACOMBCCEP", "DIS_CC") + hera_ep.write_new_commondata( + Path("data_EP-SIGMARED.yaml"), + Path("kinematics_EP-SIGMARED.yaml"), + Path("uncertainties_EP-SIGMARED.yaml"), + ) +if __name__ == "__main__": + main() diff --git a/nnpdf_data/nnpdf_data/commondata/HERA_NC_225GEV/filter.py b/nnpdf_data/nnpdf_data/commondata/HERA_NC_225GEV/filter.py index bce0dc1c93..0b4b5f9aee 100644 --- a/nnpdf_data/nnpdf_data/commondata/HERA_NC_225GEV/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/HERA_NC_225GEV/filter.py @@ -1,63 +1,63 @@ -from nnpdf_data.filter_utils.hera_utils import commondata #, covmat_is_close -from pathlib import Path from dataclasses import dataclass -import typing -from typing import List -import numpy as np -import pandas as pd from os import PathLike -import yaml +from pathlib import Path + +import pandas as pd + +from nnpdf_data.filter_utils.hera_utils import commondata # , covmat_is_close + @dataclass class hera_commondata(commondata): - def __init__(self, filename: str | PathLike, dataset_name: str, - process: str): - # Read the data. - file = Path(filename) - df = pd.read_table(file, sep=r"\s+") - - # Kinematic quantieties. - self.central_values = df["Sigma"].to_numpy() - self.kinematics = df[["x", "Q2", "y"]].to_numpy() - self.kinematic_quantities = ["x", "Q2", "y"] - - # Statistical uncertainties. - statistical_uncertainties = df["stat"].to_numpy() - for iunc,unc in enumerate(statistical_uncertainties): - unc = self.central_values[iunc]*unc/100 - statistical_uncertainties[iunc] = unc - self.statistical_uncertainties = statistical_uncertainties - - # Systematic uncertainties. - # remove the column containing the total uncertainty excluding - # procedural uncertainties. - df = df.drop(columns=["tot_noproc"]) - sys_uncert_col_names = list(df.columns.values)[5:] - self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy() - systematic_uncertainties = df[sys_uncert_col_names].to_numpy() - for iunc,unc in enumerate(systematic_uncertainties): - unc = self.central_values[iunc]*unc/100 - systematic_uncertainties[iunc] = unc - self.systematic_uncertainties = systematic_uncertainties - - # All uncertainties are treated as multiplicative. - systypes = [] - for name in sys_uncert_col_names: - if(name == "uncor"): - systypes.append(("MULT", "UNCORR")) - else: - systypes.append(("MULT", "HC_" + name)) - self.systypes = systypes - self.process = process - self.dataset_name = dataset_name + def __init__(self, filename: str | PathLike, dataset_name: str, process: str): + # Read the data. + file = Path(filename) + df = pd.read_table(file, sep=r"\s+") + + # Kinematic quantieties. + self.central_values = df["Sigma"].to_numpy() + self.kinematics = df[["x", "Q2", "y"]].to_numpy() + self.kinematic_quantities = ["x", "Q2", "y"] + + # Statistical uncertainties. + statistical_uncertainties = df["stat"].to_numpy(copy=True) + for iunc, unc in enumerate(statistical_uncertainties): + unc = self.central_values[iunc] * unc / 100 + statistical_uncertainties[iunc] = unc + self.statistical_uncertainties = statistical_uncertainties + + # Systematic uncertainties. + # remove the column containing the total uncertainty excluding + # procedural uncertainties. + df = df.drop(columns=["tot_noproc"]) + sys_uncert_col_names = list(df.columns.values)[5:] + self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy() + systematic_uncertainties = df[sys_uncert_col_names].to_numpy() + for iunc, unc in enumerate(systematic_uncertainties): + unc = self.central_values[iunc] * unc / 100 + systematic_uncertainties[iunc] = unc + self.systematic_uncertainties = systematic_uncertainties + + # All uncertainties are treated as multiplicative. + systypes = [] + for name in sys_uncert_col_names: + if name == "uncor": + systypes.append(("MULT", "UNCORR")) + else: + systypes.append(("MULT", "HC_" + name)) + self.systypes = systypes + self.process = process + self.dataset_name = dataset_name -def main(): - hera_ep = hera_commondata("./rawdata/HERA1+2_NCep_460.dat","HERACOMBNCEP460", "DIS_NCE") - hera_ep.write_new_commondata(Path("data_EP-SIGMARED.yaml"), - Path("kinematics_EP-SIGMARED.yaml"), - Path("uncertainties_EP-SIGMARED.yaml")) -if __name__ == "__main__": - main() +def main(): + hera_ep = hera_commondata("./rawdata/HERA1+2_NCep_460.dat", "HERACOMBNCEP460", "DIS_NCE") + hera_ep.write_new_commondata( + Path("data_EP-SIGMARED.yaml"), + Path("kinematics_EP-SIGMARED.yaml"), + Path("uncertainties_EP-SIGMARED.yaml"), + ) +if __name__ == "__main__": + main() diff --git a/nnpdf_data/nnpdf_data/commondata/HERA_NC_251GEV/filter.py b/nnpdf_data/nnpdf_data/commondata/HERA_NC_251GEV/filter.py index d634ad2d03..52d8be1ca6 100644 --- a/nnpdf_data/nnpdf_data/commondata/HERA_NC_251GEV/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/HERA_NC_251GEV/filter.py @@ -1,65 +1,67 @@ -from nnpdf_data.filter_utils.hera_utils import commondata #, covmat_is_close -from pathlib import Path from dataclasses import dataclass +from os import PathLike +from pathlib import Path import typing from typing import List + import numpy as np import pandas as pd -from os import PathLike import yaml +from nnpdf_data.filter_utils.hera_utils import commondata # , covmat_is_close + + @dataclass class hera_commondata(commondata): - def __init__(self, filename: str | PathLike, dataset_name: str, - process: str): - # Read the data. - file = Path(filename) - df = pd.read_table(file, sep=r"\s+") - - # Kinematic quantieties. - self.central_values = df["Sigma"].to_numpy() - self.kinematics = df[["x", "Q2", "y"]].to_numpy() - self.kinematic_quantities = ["x", "Q2", "y"] + def __init__(self, filename: str | PathLike, dataset_name: str, process: str): + # Read the data. + file = Path(filename) + df = pd.read_table(file, sep=r"\s+") - # Statistical uncertainties. - statistical_uncertainties = df["stat"].to_numpy() - for iunc,unc in enumerate(statistical_uncertainties): - unc = self.central_values[iunc]*unc/100 - statistical_uncertainties[iunc] = unc - self.statistical_uncertainties = statistical_uncertainties + # Kinematic quantieties. + self.central_values = df["Sigma"].to_numpy() + self.kinematics = df[["x", "Q2", "y"]].to_numpy() + self.kinematic_quantities = ["x", "Q2", "y"] - # Systematic uncertainties. - # remove the column containing the total uncertainty excluding - # procedural uncertainties. - df = df.drop(columns=["tot_noproc"]) - sys_uncert_col_names = list(df.columns.values)[5:] - self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy() - systematic_uncertainties = df[sys_uncert_col_names].to_numpy() - for iunc,unc in enumerate(systematic_uncertainties): - unc = self.central_values[iunc]*unc/100 - systematic_uncertainties[iunc] = unc - self.systematic_uncertainties = systematic_uncertainties - - # All uncertainties are treated as multiplicative. - systypes = [] - for name in sys_uncert_col_names: - if(name == "uncor"): - systypes.append(("MULT", "UNCORR")) - else: - systypes.append(("MULT", "HC_" + name)) - self.systypes = systypes - self.process = process - self.dataset_name = dataset_name + # Statistical uncertainties. + statistical_uncertainties = df["stat"].to_numpy(copy=True) + for iunc, unc in enumerate(statistical_uncertainties): + unc = self.central_values[iunc] * unc / 100 + statistical_uncertainties[iunc] = unc + self.statistical_uncertainties = statistical_uncertainties -def main(): - hera_ep = hera_commondata("./rawdata/HERA1+2_NCep_575.dat","HERACOMBNCEP575", "DIS_NCE") - hera_ep.write_new_commondata(Path("data_EP-SIGMARED.yaml"), - Path("kinematics_EP-SIGMARED.yaml"), - Path("uncertainties_EP-SIGMARED.yaml")) + # Systematic uncertainties. + # remove the column containing the total uncertainty excluding + # procedural uncertainties. + df = df.drop(columns=["tot_noproc"]) + sys_uncert_col_names = list(df.columns.values)[5:] + self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy() + systematic_uncertainties = df[sys_uncert_col_names].to_numpy() + for iunc, unc in enumerate(systematic_uncertainties): + unc = self.central_values[iunc] * unc / 100 + systematic_uncertainties[iunc] = unc + self.systematic_uncertainties = systematic_uncertainties + # All uncertainties are treated as multiplicative. + systypes = [] + for name in sys_uncert_col_names: + if name == "uncor": + systypes.append(("MULT", "UNCORR")) + else: + systypes.append(("MULT", "HC_" + name)) + self.systypes = systypes + self.process = process + self.dataset_name = dataset_name -if __name__ == "__main__": - main() +def main(): + hera_ep = hera_commondata("./rawdata/HERA1+2_NCep_575.dat", "HERACOMBNCEP575", "DIS_NCE") + hera_ep.write_new_commondata( + Path("data_EP-SIGMARED.yaml"), + Path("kinematics_EP-SIGMARED.yaml"), + Path("uncertainties_EP-SIGMARED.yaml"), + ) +if __name__ == "__main__": + main() diff --git a/nnpdf_data/nnpdf_data/commondata/HERA_NC_300GEV/filter.py b/nnpdf_data/nnpdf_data/commondata/HERA_NC_300GEV/filter.py index 968265df82..b0293a1eab 100644 --- a/nnpdf_data/nnpdf_data/commondata/HERA_NC_300GEV/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/HERA_NC_300GEV/filter.py @@ -1,65 +1,67 @@ -from nnpdf_data.filter_utils.hera_utils import commondata #, covmat_is_close -from pathlib import Path from dataclasses import dataclass +from os import PathLike +from pathlib import Path import typing from typing import List + import numpy as np import pandas as pd -from os import PathLike import yaml +from nnpdf_data.filter_utils.hera_utils import commondata # , covmat_is_close + + @dataclass class hera_commondata(commondata): - def __init__(self, filename: str | PathLike, dataset_name: str, - process: str): - # Read the data. - file = Path(filename) - df = pd.read_table(file, sep=r"\s+") - - # Kinematic quantieties. - self.central_values = df["Sigma"].to_numpy() - self.kinematics = df[["x", "Q2", "y"]].to_numpy() - self.kinematic_quantities = ["x", "Q2", "y"] + def __init__(self, filename: str | PathLike, dataset_name: str, process: str): + # Read the data. + file = Path(filename) + df = pd.read_table(file, sep=r"\s+") - # Statistical uncertainties. - statistical_uncertainties = df["stat"].to_numpy() - for iunc,unc in enumerate(statistical_uncertainties): - unc = self.central_values[iunc]*unc/100 - statistical_uncertainties[iunc] = unc - self.statistical_uncertainties = statistical_uncertainties + # Kinematic quantieties. + self.central_values = df["Sigma"].to_numpy() + self.kinematics = df[["x", "Q2", "y"]].to_numpy() + self.kinematic_quantities = ["x", "Q2", "y"] - # Systematic uncertainties. - # remove the column containing the total uncertainty excluding - # procedural uncertainties. - df = df.drop(columns=["tot_noproc"]) - sys_uncert_col_names = list(df.columns.values)[5:] - self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy() - systematic_uncertainties = df[sys_uncert_col_names].to_numpy() - for iunc,unc in enumerate(systematic_uncertainties): - unc = self.central_values[iunc]*unc/100 - systematic_uncertainties[iunc] = unc - self.systematic_uncertainties = systematic_uncertainties - - # All uncertainties are treated as multiplicative. - systypes = [] - for name in sys_uncert_col_names: - if(name == "uncor"): - systypes.append(("MULT", "UNCORR")) - else: - systypes.append(("MULT", "HC_" + name)) - self.systypes = systypes - self.process = process - self.dataset_name = dataset_name + # Statistical uncertainties. + statistical_uncertainties = df["stat"].to_numpy(copy=True) + for iunc, unc in enumerate(statistical_uncertainties): + unc = self.central_values[iunc] * unc / 100 + statistical_uncertainties[iunc] = unc + self.statistical_uncertainties = statistical_uncertainties + # Systematic uncertainties. + # remove the column containing the total uncertainty excluding + # procedural uncertainties. + df = df.drop(columns=["tot_noproc"]) + sys_uncert_col_names = list(df.columns.values)[5:] + self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy() + systematic_uncertainties = df[sys_uncert_col_names].to_numpy() + for iunc, unc in enumerate(systematic_uncertainties): + unc = self.central_values[iunc] * unc / 100 + systematic_uncertainties[iunc] = unc + self.systematic_uncertainties = systematic_uncertainties -def main(): - hera_ep = hera_commondata("./rawdata/HERA1+2_NCep_820.dat","HERACOMBNCEP820", "DIS_NCE") - hera_ep.write_new_commondata(Path("data_EP-SIGMARED.yaml"), - Path("kinematics_EP-SIGMARED.yaml"), - Path("uncertainties_EP-SIGMARED.yaml")) + # All uncertainties are treated as multiplicative. + systypes = [] + for name in sys_uncert_col_names: + if name == "uncor": + systypes.append(("MULT", "UNCORR")) + else: + systypes.append(("MULT", "HC_" + name)) + self.systypes = systypes + self.process = process + self.dataset_name = dataset_name -if __name__ == "__main__": - main() +def main(): + hera_ep = hera_commondata("./rawdata/HERA1+2_NCep_820.dat", "HERACOMBNCEP820", "DIS_NCE") + hera_ep.write_new_commondata( + Path("data_EP-SIGMARED.yaml"), + Path("kinematics_EP-SIGMARED.yaml"), + Path("uncertainties_EP-SIGMARED.yaml"), + ) +if __name__ == "__main__": + main() diff --git a/nnpdf_data/nnpdf_data/commondata/HERA_NC_318GEV/filter.py b/nnpdf_data/nnpdf_data/commondata/HERA_NC_318GEV/filter.py index df486a068a..bab4e933cd 100644 --- a/nnpdf_data/nnpdf_data/commondata/HERA_NC_318GEV/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/HERA_NC_318GEV/filter.py @@ -1,69 +1,73 @@ -from nnpdf_data.filter_utils.hera_utils import commondata #, covmat_is_close -from pathlib import Path from dataclasses import dataclass +from os import PathLike +from pathlib import Path import typing from typing import List + import numpy as np import pandas as pd -from os import PathLike import yaml +from nnpdf_data.filter_utils.hera_utils import commondata # , covmat_is_close + + @dataclass class hera_commondata(commondata): - def __init__(self, filename: str | PathLike, dataset_name: str, - process: str): - # Read the data. - file = Path(filename) - df = pd.read_table(file, sep=r"\s+") - - # Kinematic quantieties. - self.central_values = df["Sigma"].to_numpy() - self.kinematics = df[["x", "Q2", "y"]].to_numpy() - self.kinematic_quantities = ["x", "Q2", "y"] + def __init__(self, filename: str | PathLike, dataset_name: str, process: str): + # Read the data. + file = Path(filename) + df = pd.read_table(file, sep=r"\s+") - # Statistical uncertainties. - statistical_uncertainties = df["stat"].to_numpy() - for iunc,unc in enumerate(statistical_uncertainties): - unc = self.central_values[iunc]*unc/100 - statistical_uncertainties[iunc] = unc - self.statistical_uncertainties = statistical_uncertainties + # Kinematic quantieties. + self.central_values = df["Sigma"].to_numpy() + self.kinematics = df[["x", "Q2", "y"]].to_numpy() + self.kinematic_quantities = ["x", "Q2", "y"] - # Systematic uncertainties. - # remove the column containing the total uncertainty excluding - # procedural uncertainties. - df = df.drop(columns=["tot_noproc"]) - sys_uncert_col_names = list(df.columns.values)[5:] - self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy() - systematic_uncertainties = df[sys_uncert_col_names].to_numpy() - for iunc,unc in enumerate(systematic_uncertainties): - unc = self.central_values[iunc]*unc/100 - systematic_uncertainties[iunc] = unc - self.systematic_uncertainties = systematic_uncertainties - - # All uncertainties are treated as multiplicative. - systypes = [] - for name in sys_uncert_col_names: - if(name == "uncor"): - systypes.append(("MULT", "UNCORR")) - else: - systypes.append(("MULT", "HC_" + name)) - self.systypes = systypes - self.process = process - self.dataset_name = dataset_name + # Statistical uncertainties. + statistical_uncertainties = df["stat"].to_numpy(copy=True) + for iunc, unc in enumerate(statistical_uncertainties): + unc = self.central_values[iunc] * unc / 100 + statistical_uncertainties[iunc] = unc + self.statistical_uncertainties = statistical_uncertainties -def main(): - hera_em = hera_commondata("./rawdata/HERA1+2_NCem.dat","HERACOMBNCEM", "DIS_NCE") - hera_em.write_new_commondata(Path("data_EM-SIGMARED.yaml"), - Path("kinematics_EM-SIGMARED.yaml"), - Path("uncertainties_EM-SIGMARED.yaml")) - hera_ep = hera_commondata("./rawdata/HERA1+2_NCep_920.dat","HERACOMBNCEP", "DIS_NCE") - hera_ep.write_new_commondata(Path("data_EP-SIGMARED.yaml"), - Path("kinematics_EP-SIGMARED.yaml"), - Path("uncertainties_EP-SIGMARED.yaml")) + # Systematic uncertainties. + # remove the column containing the total uncertainty excluding + # procedural uncertainties. + df = df.drop(columns=["tot_noproc"]) + sys_uncert_col_names = list(df.columns.values)[5:] + self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy() + systematic_uncertainties = df[sys_uncert_col_names].to_numpy() + for iunc, unc in enumerate(systematic_uncertainties): + unc = self.central_values[iunc] * unc / 100 + systematic_uncertainties[iunc] = unc + self.systematic_uncertainties = systematic_uncertainties + # All uncertainties are treated as multiplicative. + systypes = [] + for name in sys_uncert_col_names: + if name == "uncor": + systypes.append(("MULT", "UNCORR")) + else: + systypes.append(("MULT", "HC_" + name)) + self.systypes = systypes + self.process = process + self.dataset_name = dataset_name -if __name__ == "__main__": - main() +def main(): + hera_em = hera_commondata("./rawdata/HERA1+2_NCem.dat", "HERACOMBNCEM", "DIS_NCE") + hera_em.write_new_commondata( + Path("data_EM-SIGMARED.yaml"), + Path("kinematics_EM-SIGMARED.yaml"), + Path("uncertainties_EM-SIGMARED.yaml"), + ) + hera_ep = hera_commondata("./rawdata/HERA1+2_NCep_920.dat", "HERACOMBNCEP", "DIS_NCE") + hera_ep.write_new_commondata( + Path("data_EP-SIGMARED.yaml"), + Path("kinematics_EP-SIGMARED.yaml"), + Path("uncertainties_EP-SIGMARED.yaml"), + ) +if __name__ == "__main__": + main() diff --git a/nnpdf_data/nnpdf_data/commondata/HERA_NC_318GEV_EAVG/filter.py b/nnpdf_data/nnpdf_data/commondata/HERA_NC_318GEV_EAVG/filter.py index 0c45dc48a9..3b7ac732c6 100644 --- a/nnpdf_data/nnpdf_data/commondata/HERA_NC_318GEV_EAVG/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/HERA_NC_318GEV_EAVG/filter.py @@ -1,69 +1,75 @@ -from nnpdf_data.filter_utils.hera_utils import commondata #, covmat_is_close -from pathlib import Path from dataclasses import dataclass +from os import PathLike +from pathlib import Path import typing from typing import List + import numpy as np import pandas as pd -from os import PathLike import yaml +from nnpdf_data.filter_utils.hera_utils import commondata # , covmat_is_close + + @dataclass class hera_cb_commondata(commondata): - def __init__(self, filename: str | PathLike, dataset_name: str, - process: str): - # Read the data. - file = Path(filename) - df = pd.read_table(file, sep=r"\s+",skiprows=36) + def __init__(self, filename: str | PathLike, dataset_name: str, process: str): + # Read the data. + file = Path(filename) + df = pd.read_table(file, sep=r"\s+", skiprows=36) + + # Kinematic quantieties. + self.central_values = df["Sigma"].to_numpy() + # compute y = Q2/x/S + S = 318**2 # GeV**2 + y = df["Q2"] / df["x"] / S + df.insert(1, "y", y.to_list()) + self.kinematics = df[["x", "Q2", "y"]].to_numpy() + self.kinematic_quantities = ["x", "Q2", "y"] - # Kinematic quantieties. - self.central_values = df["Sigma"].to_numpy() - # compute y = Q2/x/S - S=318**2 # GeV**2 - y=df["Q2"]/df["x"]/S - df.insert(1,"y",y.to_list()) - self.kinematics = df[["x", "Q2", "y"]].to_numpy() - self.kinematic_quantities = ["x", "Q2", "y"] + # Statistical uncertainties. + statistical_uncertainties = df["stat"].to_numpy(copy=True) + for iunc, unc in enumerate(statistical_uncertainties): + unc = self.central_values[iunc] * unc / 100 + statistical_uncertainties[iunc] = unc + self.statistical_uncertainties = statistical_uncertainties - # Statistical uncertainties. - statistical_uncertainties = df["stat"].to_numpy() - for iunc,unc in enumerate(statistical_uncertainties): - unc = self.central_values[iunc]*unc/100 - statistical_uncertainties[iunc] = unc - self.statistical_uncertainties = statistical_uncertainties + # Systematic uncertainties. + sys_uncert_col_names = list(df.columns.values)[5:] + self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy() + systematic_uncertainties = df[sys_uncert_col_names].to_numpy() + for iunc, unc in enumerate(systematic_uncertainties): + unc = self.central_values[iunc] * unc / 100 + systematic_uncertainties[iunc] = unc + self.systematic_uncertainties = systematic_uncertainties - # Systematic uncertainties. - sys_uncert_col_names = list(df.columns.values)[5:] - self.systematic_uncertainties = df[sys_uncert_col_names].to_numpy() - systematic_uncertainties = df[sys_uncert_col_names].to_numpy() - for iunc,unc in enumerate(systematic_uncertainties): - unc = self.central_values[iunc]*unc/100 - systematic_uncertainties[iunc] = unc - self.systematic_uncertainties = systematic_uncertainties - - # All uncertainties are treated as multiplicative. - systypes = [] - for name in sys_uncert_col_names: - if(name == "uncor"): - systypes.append(("MULT", "UNCORR")) - else: - systypes.append(("MULT", "HC_" + name)) - self.systypes = systypes - self.process = process - self.dataset_name = dataset_name + # All uncertainties are treated as multiplicative. + systypes = [] + for name in sys_uncert_col_names: + if name == "uncor": + systypes.append(("MULT", "UNCORR")) + else: + systypes.append(("MULT", "HC_" + name)) + self.systypes = systypes + self.process = process + self.dataset_name = dataset_name def main(): - hera_b = hera_cb_commondata("./rawdata/d18-037.tableBeauty.txt","HERACOMBNCEP", "DIS_NCE") - hera_b.write_new_commondata(Path("data_BOTTOM-SIGMARED.yaml"), - Path("kinematics_BOTTOM-SIGMARED.yaml"), - Path("uncertainties_BOTTOM-SIGMARED.yaml")) + hera_b = hera_cb_commondata("./rawdata/d18-037.tableBeauty.txt", "HERACOMBNCEP", "DIS_NCE") + hera_b.write_new_commondata( + Path("data_BOTTOM-SIGMARED.yaml"), + Path("kinematics_BOTTOM-SIGMARED.yaml"), + Path("uncertainties_BOTTOM-SIGMARED.yaml"), + ) - hera_c = hera_cb_commondata("./rawdata/d18-037.tableCharm.txt","HERACOMBNCEP", "DIS_NCE") - hera_c.write_new_commondata(Path("data_CHARM-SIGMARED.yaml"), - Path("kinematics_CHARM-SIGMARED.yaml"), - Path("uncertainties_CHARM-SIGMARED.yaml")) + hera_c = hera_cb_commondata("./rawdata/d18-037.tableCharm.txt", "HERACOMBNCEP", "DIS_NCE") + hera_c.write_new_commondata( + Path("data_CHARM-SIGMARED.yaml"), + Path("kinematics_CHARM-SIGMARED.yaml"), + Path("uncertainties_CHARM-SIGMARED.yaml"), + ) -if __name__ == "__main__": - main() +if __name__ == "__main__": + main() diff --git a/nnpdf_data/nnpdf_data/commondataparser.py b/nnpdf_data/nnpdf_data/commondataparser.py index 21b5b60fe1..06d9f88c53 100644 --- a/nnpdf_data/nnpdf_data/commondataparser.py +++ b/nnpdf_data/nnpdf_data/commondataparser.py @@ -625,7 +625,7 @@ def load_kinematics(self, fill_to_three=True, drop_minmax=True): for i in range(3 - ncol): dbin[f"extra_{i}"] = d - kin_dict[bin_index] = pd.DataFrame(dbin).stack() + kin_dict[bin_index] = pd.DataFrame(dbin).stack().dropna() if len(kin_dict) != self.ndata: raise ValueError( diff --git a/nnpdf_data/pyproject.toml b/nnpdf_data/pyproject.toml index 92604f2c17..404728a9d0 100644 --- a/nnpdf_data/pyproject.toml +++ b/nnpdf_data/pyproject.toml @@ -35,7 +35,7 @@ include = [ python = "^3.9" "ruamel.yaml" = "*" validobj = "*" -pandas = "<3" +pandas = "*" numpy = "*" # Required to run filters: `filter_files_dependencies` scipy = {version = "*", optional = true} diff --git a/pyproject.toml b/pyproject.toml index b35cdf7129..a561a658bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,7 @@ python = ">=3.9" matplotlib = "^3.9" pineappl = "^1.0.0" # TODO: make the code compatible with pandas 3 -pandas = "<3" +pandas = "*" numpy = "*" "ruamel.yaml" = {version = ">=0.19.1", extras=["oldlibyaml"]} validobj = "*" diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index 7e2d3ce0fe..f849dbf183 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -214,9 +214,8 @@ def dataset_inputs_covmat_from_systematics( special_corrs.append(sys_errors.loc[:, ~is_intra_dataset_error]) # concat systematics across datasets - special_sys = pd.concat(special_corrs, axis=0, sort=False) # non-overlapping systematics are set to NaN by concat, fill with 0 instead. - special_sys.fillna(0, inplace=True) + special_sys = pd.concat(special_corrs, axis=0, sort=False).fillna(0) diag = la.block_diag(*block_diags) covmat = diag + special_sys.to_numpy() @ special_sys.to_numpy().T if use_weights_in_covmat: @@ -811,7 +810,7 @@ def reorder_thcovmat_as_expcovmat(fitthcovmat, data): tmp = theory_covmat.droplevel(0, axis=0).droplevel(0, axis=1) # old to new names mapping new_names = {d[0]: legacy_to_new_map(d[0])[0] for d in tmp.index} - tmp.rename(columns=new_names, index=new_names, level=0, inplace=True) + tmp = tmp.rename(columns=new_names, index=new_names, level=0) # reorder bb = [str(i) for i in data] return tmp.reindex(index=bb, columns=bb, level=0) diff --git a/validphys2/src/validphys/fitdata.py b/validphys2/src/validphys/fitdata.py index df04239d0f..de89b37672 100644 --- a/validphys2/src/validphys/fitdata.py +++ b/validphys2/src/validphys/fitdata.py @@ -509,8 +509,7 @@ def datasets_properties_table(data_input): dataset_property_dict["Other fields"].append( ", ".join([f"{k}: {v}" for k, v in ds_input_dict.items()]) if ds_input_dict else "-" ) - df = pd.DataFrame(dataset_property_dict) - df.set_index("Dataset", inplace=True) + df = pd.DataFrame(dataset_property_dict).set_index("Dataset") df = df[["Training fraction", "Weight", "C-factors", "Other fields"]] return df @@ -583,7 +582,5 @@ def fit_code_version(fit): @table def fits_version_table(fits_fit_code_version): """Produces a table of version information for multiple fits.""" - vtable = pd.concat(fits_fit_code_version, axis=1) # Fill NaNs with "unavailable" - vtable.fillna("unavailable", inplace=True) - return vtable + return pd.concat(fits_fit_code_version, axis=1).fillna("unavailable") diff --git a/validphys2/src/validphys/hyperoptplot.py b/validphys2/src/validphys/hyperoptplot.py index c09b42f041..591635cea7 100644 --- a/validphys2/src/validphys/hyperoptplot.py +++ b/validphys2/src/validphys/hyperoptplot.py @@ -1,6 +1,6 @@ """ - Module for the parsing and plotting of the results and output of - previous hyperparameter scans +Module for the parsing and plotting of the results and output of +previous hyperparameter scans """ # Within this file you can find the "more modern" vp-integrated hyperopt stuff @@ -586,7 +586,7 @@ def hyperopt_table(hyperopt_dataframe): filters set in the commandline arguments. """ dataframe, _ = hyperopt_dataframe - dataframe.sort_values(by=["loss"], inplace=True) + dataframe = dataframe.sort_values(by=["loss"]) return dataframe @@ -727,22 +727,10 @@ def plot_scans(df, best_df, plotting_parameter, include_best=True): best_df[key] = original_best.apply(lambda x: x[0]) ordering_true, best_x = order_axis(df, best_df, key=key) ax = sns.violinplot( - x=key, - y=loss_k, - data=df, - ax=ax, - palette="Set2", - cut=0.0, - order=ordering_true, + x=key, y=loss_k, data=df, ax=ax, palette="Set2", cut=0.0, order=ordering_true ) ax = sns.stripplot( - x=key, - y=loss_k, - data=df, - ax=ax, - color="gray", - order=ordering_true, - alpha=0.4, + x=key, y=loss_k, data=df, ax=ax, color="gray", order=ordering_true, alpha=0.4 ) # Finally plot the "best" one, which will be first diff --git a/validphys2/src/validphys/pseudodata.py b/validphys2/src/validphys/pseudodata.py index 137e666fca..08a4f7103e 100644 --- a/validphys2/src/validphys/pseudodata.py +++ b/validphys2/src/validphys/pseudodata.py @@ -111,9 +111,7 @@ def read_replica_pseudodata(fit, context_index, replica): new_name, _ = legacy_to_new_map(dsname) mapping[dsname] = new_name - pseudodata.rename(mapping, level=1, inplace=True) - - pseudodata.sort_index(level=range(1, 3), inplace=True) + pseudodata = pseudodata.rename(mapping, level=1).sort_index(level=range(1, 3)) pseudodata.index = sorted_index tr = pseudodata[pseudodata["type"] == "training"] diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index 27f320484c..f760d35544 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -270,8 +270,7 @@ def groups_index(groups_data, diagonal_basis=True): ) columns = ["group", "dataset", "id"] - df = pd.DataFrame(records, columns=columns) - df.set_index(columns, inplace=True) + df = pd.DataFrame(records, columns=columns).set_index(columns) return df.index diff --git a/validphys2/src/validphys/tableloader.py b/validphys2/src/validphys/tableloader.py index 3de5696fda..2b773307ec 100644 --- a/validphys2/src/validphys/tableloader.py +++ b/validphys2/src/validphys/tableloader.py @@ -79,7 +79,7 @@ def load_adapted_fits_chi2_table(filename): ndatas = dns.iloc[:, 0] f = lambda x: x[x.columns[0]] * x[x.columns[1]] - df = df.groupby(axis=1, level=0).apply(f) + df = df.T.groupby(level=0).apply(lambda x: f(x.T)).T df.columns = pd.MultiIndex.from_product([list(df.columns), ['chi2']]) return ndatas, df @@ -119,31 +119,33 @@ def combine_pseudoreplica_tables( total_chis = total.groupby(level=3).sum(min_count=1) def fixup_min_points(df): + # Since pandas 3, needs to transpose the df + # see deprecation note: https://pandas.pydata.org/pandas-docs/version/2.3/reference/api/pandas.DataFrame.groupby.html + df = df.T m = (~df.isnull()).sum(axis=1, min_count=1) >= min_points_required df[df[m].isnull()] = np.inf - return df + return df.T # The idea is: Set to inf the nans of the valid curves, so that we select # the minimum (which is not infinite). Leave the bad nans as nans, so we # write nan always for those. - total_chis = total_chis.groupby(axis=1, level=1, group_keys=False).apply(fixup_min_points) + total_chis = total_chis.T.groupby(level=1, group_keys=False).apply(fixup_min_points).T # Note, asarray is needed because it ignores NANs otherwise. argmin = lambda x: pd.Series(np.argmin(np.asarray(x), axis=1), index=x.index) - best_replicas = total_chis.groupby(axis=1, level=1).apply(argmin) - gb = together.groupby(axis=1, level=1) + best_replicas = total_chis.T.groupby(level=1).apply(lambda x: argmin(x.T)).T def inner_select(df, indexes): return df.iloc[:, indexes[df.name]] def select_best_replicas(df): indexes = best_replicas[df.name] - return df.groupby(level=3).apply(inner_select, indexes=indexes) + return df.T.groupby(level=3).apply(inner_select, indexes=indexes).T - res = gb.apply(select_best_replicas) + res = together.T.groupby(level=1).apply(select_best_replicas).T res.index = res.index.droplevel(0) - res.sort_index(inplace=True) + res = res.sort_index() # TODO: Why in earth did I decide to keep this?! res.columns = pd.MultiIndex.from_product((res.columns, ['chi2'])) @@ -154,8 +156,7 @@ def select_best_replicas(df): def get_extrasum_slice(df, components): """Extract a slice of a table that has the components in the format that extra_sums expects.""" - df = pd.DataFrame(df) - df.sort_index(inplace=True) + df = pd.DataFrame(df).sort_index() total_token = ' Total' keys = [ (c[: -len(total_token)], 'Total') if c.endswith(total_token) else (slice(None), c) diff --git a/validphys2/src/validphys/tests/test_weights.py b/validphys2/src/validphys/tests/test_weights.py index 7393645a17..b86ab5b054 100644 --- a/validphys2/src/validphys/tests/test_weights.py +++ b/validphys2/src/validphys/tests/test_weights.py @@ -1,6 +1,7 @@ """ test_weights.py """ + import numpy as np from validphys.api import API @@ -11,12 +12,12 @@ def test_weights_have_same_commondata(weighted_data_witht0_config): normal, weighted = data.datasets normalds, weightedds = normal.load_commondata(), weighted.load_commondata() assert ( - normalds.systematics_table["MULT"].iloc[0][0] - == weightedds.systematics_table["MULT"].iloc[0][0] + normalds.systematics_table["MULT"].iloc[0].iloc[0] + == weightedds.systematics_table["MULT"].iloc[0].iloc[0] ) assert ( - normalds.systematics_table["ADD"].iloc[0][0] - == weightedds.systematics_table["ADD"].iloc[0][0] + normalds.systematics_table["ADD"].iloc[0].iloc[0] + == weightedds.systematics_table["ADD"].iloc[0].iloc[0] ) diff --git a/validphys2/src/validphys/theorycovariance/output.py b/validphys2/src/validphys/theorycovariance/output.py index 9d69734eed..efae6bd4ed 100644 --- a/validphys2/src/validphys/theorycovariance/output.py +++ b/validphys2/src/validphys/theorycovariance/output.py @@ -48,9 +48,7 @@ def matrix_plot_labels(df): def plot_covmat_heatmap(covmat, title): """Matrix plot of a covariance matrix.""" - df = covmat - df.sort_index(axis=0, inplace=True) - df.sort_index(axis=1, inplace=True) + df = covmat.sort_index(axis=0).sort_index(axis=1) oldindex = df.index.tolist() newindex = sorted(oldindex, key=_get_key) # reindex index @@ -150,9 +148,7 @@ def _get_key(element): def plot_corrmat_heatmap(corrmat, title): """Matrix plot of a correlation matrix""" - df = corrmat - df.sort_index(axis=0, inplace=True) - df.sort_index(axis=1, inplace=True) + df = corrmat.sort_index(axis=0).sort_index(axis=1) oldindex = df.index.tolist() newindex = sorted(oldindex, key=_get_key) # reindex index @@ -341,15 +337,14 @@ def plot_diag_cov_comparison_by_experiment( fig, ax = plotutils.subplots(figsize=(20, 10)) - procs_data_values_experiment.sort_index(level=0, inplace=True) + procs_data_values_experiment = procs_data_values_experiment.sort_index(level=0) data = np.abs(procs_data_values_experiment) plot_index = procs_data_values_experiment.index # plot exp values, take diagonal first sqrtdiags_exp = pd.DataFrame( np.sqrt(np.diag(experiments_covmat_no_table)), index=experiments_covmat_no_table.index - ) - sqrtdiags_exp.sort_index(level=0, inplace=True) + ).sort_index(level=0) sqrtdiags_exp = sqrtdiags_exp[0] / data.values ax.plot(sqrtdiags_exp.values, "*", markersize=4, label="Experimental uncertanties") From aa1caef8eed2c8b35547f965354d94d3b7c6fe83 Mon Sep 17 00:00:00 2001 From: "Juan M. Cruz-Martinez" Date: Tue, 27 Jan 2026 11:49:06 +0100 Subject: [PATCH 2/2] update fitbot after pandas 3 update --- .github/workflows/fitbot.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/fitbot.yml b/.github/workflows/fitbot.yml index 25398253d7..4db180f7f3 100644 --- a/.github/workflows/fitbot.yml +++ b/.github/workflows/fitbot.yml @@ -12,7 +12,7 @@ env: POSTFIT_NREP: 15 # requested minimum replicas for postfit # IMPORTANT # WHEN CHANGING THE REFERENCE SET, THE NEW REFERENCE MUST BE MANUALLY UPLOADED TO THE SERVER - REFERENCE_SET: NNBOT-99108504e-2025-11-22 # reference set for exact results + REFERENCE_SET: NNNBOT-1a81255f3-2026-01-27 # reference set for exact results STABLE_REFERENCE_SET: NNBOT-99108504e-2025-11-22 # reference set for last tag PYTHONHASHSEED: "0"