diff --git a/src/xhydro/pygmet/make_toml_config_pygmet.py b/src/xhydro/pygmet/make_toml_config_pygmet.py
new file mode 100644
index 00000000..0db06f83
--- /dev/null
+++ b/src/xhydro/pygmet/make_toml_config_pygmet.py
@@ -0,0 +1,256 @@
+"""
+Hard-coded TOML template writer.
+
+This module writes a new TOML config file based on a fixed template, replacing only
+the fields marked with "# MAKE_DYNAMIC" in the original template.
+"""
+
+from __future__ import annotations
+from collections.abc import Mapping, Sequence
+from dataclasses import dataclass
+from datetime import date, datetime
+from pathlib import Path
+from typing import Any
+
+
+@dataclass(frozen=True)
+class RawToml:
+    """Inject TOML verbatim (advanced use)."""
+
+    text: str
+
+
+def _toml_escape_single_quoted(s: str) -> str:
+    """
+    Manage the single quotes in strings.
+
+    Parameters
+    ----------
+    s : str
+        String that has single quotes and needs to be processed.
+
+    Returns
+    -------
+    str :
+        The string with the processed single quotes.
+    """
+    # TOML single-quoted strings are literal except that you must escape single quotes.
+    return "'" + s.replace("'", "''") + "'"
+
+
+def to_toml(value: Any) -> str:
+    """
+    Convert common Python types to TOML literals.
+
+    Parameters
+    ----------
+    value : Any
+        Value to convert to TOML literal.
+
+    Returns
+    -------
+    str :
+        The string literal writable to TOML file.
+    """
+    if isinstance(value, RawToml):
+        return value.text
+    if isinstance(value, bool):
+        return "true" if value else "false"
+    if isinstance(value, int):
+        return str(value)
+    if isinstance(value, float):
+        return repr(value)
+    if isinstance(value, (datetime, date)):
+        return _toml_escape_single_quoted(value.isoformat())
+    if isinstance(value, str):
+        return _toml_escape_single_quoted(value)
+    if isinstance(value, (list, tuple)):
+        return "[" + ", ".join(to_toml(v) for v in value) + "]"
+    if value is None:
+        # TOML has no null; choose what your application expects.
+        return '""'
+    raise TypeError(f"Unsupported type for TOML conversion: {type(value).__name__}")
+
+
+# Keys whose values should be provided at runtime:
+DYNAMIC_KEYS: Sequence[str] = (
+    "case_name",
+    "num_processes",
+    "modelsettings_file",
+    "input_stn_all",
+    "infile_grid_domain",
+    "outpath_parent",
+    "date_start",
+    "date_end",
+    "input_vars",
+    "target_vars",
+    "target_vars_WithProbability",
+    "minRange_vars",
+    "maxRange_vars",
+    "transform_vars",
+    "predictor_name_static_stn",
+    "predictor_name_static_grid",
+    "ensemble_end",
+    "clen",  # codespell:ignore clen
+    "auto_corr_method",
+    "target_vars_max_constrain",
+)
+
+# The hard-coded template (generated from testcase.config.static_withTemperature.toml):
+TOML_TEMPLATE = """\
+########################################################################################################################
+#  TEMPLATE FILE TO RUN Pygmet for downscaling / generator
+########################################################################################################################
+
+case_name = {case_name}
+
+num_processes = {num_processes}
+
+# model setting file
+modelsettings_file = {modelsettings_file}
+
+########################################################################################################################
+#  Input and output information
+########################################################################################################################
+
+input_stn_all = {input_stn_all}
+
+infile_grid_domain = {infile_grid_domain}
+
+outpath_parent = {outpath_parent}
+
+# file list containing dynamic predictor inputs (optional). Give it a nonsense string (e.g., "NA") can turn off dynamic
+# predictors
+dynamic_predictor_filelist = "NA"
+
+########################################################################################################################
+#  Period to process
+########################################################################################################################
+
+date_start = {date_start}
+date_end = {date_end}
+
+########################################################################################################################
+#  Variables to process
+########################################################################################################################
+
+input_vars = {input_vars}
+
+# Target variables to output, should always contain precip as first variable
+target_vars = {target_vars}
+
+# input variables may need some conversion to get target variables. if input_var and target_var have the same variable
+# name, no need to add mapping relation
+mapping_InOut_var = []
+
+# Target variables that will get an additional output probability of exceedance (P(exc) and P(non exc))
+target_vars_WithProbability = {target_vars_WithProbability}
+
+# Set the minimum and maximum range for each variable, must be same order as input_vars
+minRange_vars = {minRange_vars}
+maxRange_vars = {maxRange_vars}
+
+############################## dynamic predictors for regression
+# only useful when dynamic_predictor_filelist is valid
+
+# dynamic predictors for each target_vars. Empty list means no dynamic predictors
+dynamic_predictor_name = []
+
+# dynamic predictors may needs some processing. two keywords: "interp" an "transform"
+# Example "cube_root_prec_rate:interp=linear:transform=boxcox"
+dynamic_predictor_operation = []
+
+
+# Transformation to use for each variable, can be any of:
+# 'log', 'logit', 'direct', 'sqrt', 'cbrt', 'boxcox', 'yeojohnson', 'standardize'
+transform_vars = {transform_vars}
+
+########################################################################################################################
+#  Static predictors
+########################################################################################################################
+
+# Names of variables in input_stn_all file (static predictors)
+predictor_name_static_stn = {predictor_name_static_stn}
+
+# Names of variables in infile_grid_domain file (static predictors)
+predictor_name_static_grid = {predictor_name_static_grid}
+
+########################################################################################################################
+#  Stochastic and correlation settings
+########################################################################################################################
+
+# Number of ensemble members to generate (inclusive end index)
+# run ensemble or not: true or false
+ensemble_flag = true
+
+# ensemble settings
+ensemble_start = 1
+ensemble_end = {ensemble_end}
+
+# link variables for random number generation dependence
+linkvar = []
+
+# Correlation length for each variable (same order as input_vars)
+clen = {clen} # codespell:ignore clen
+lag1_auto_cc = [-9999, -9999, -9999] # corresponding to target_vars
+cross_cc = [] # corresponding to linkvar
+
+# Method for auto-correlation for each variable (same order as input_vars)
+auto_corr_method = {auto_corr_method}
+
+########################################################################################################################
+#  Constraints
+########################################################################################################################
+rolling_window = 31 # 31-monthly rolling mean to remove monthly cycle
+# Max constrain list (subset of target_vars)
+target_vars_max_constrain = {target_vars_max_constrain}
+"""
+
+
+def render_config_toml(params: Mapping[str, Any], strict: bool = True) -> str:
+    """
+    Render the template into a TOML config string.
+
+    Parameters
+    ----------
+    params : Mapping[str, Any]
+        The list of parameters to write to the file, in replacement of the template tags.
+    strict : bool
+        True if all DYNAMIC_KEYS must be present in `params`.
+
+    Returns
+    -------
+    str :
+       The full template with tags replaced.
+    """
+    missing = [k for k in DYNAMIC_KEYS if k not in params]
+    if strict and missing:
+        raise KeyError(f"Missing replacements for keys: {missing}")
+
+    mapping = {k: to_toml(params[k]) for k in DYNAMIC_KEYS if k in params}
+
+    # format_map will raise KeyError if any placeholder isn't present; strict above prevents that.
+    return TOML_TEMPLATE.format_map(mapping)
+
+
+def write_config_toml(output_path: str | Path, params: Mapping[str, Any], strict: bool = True) -> bool:
+    """
+    Write a fresh model settings TOML file from the hard-coded template.
+
+    Parameters
+    ----------
+    output_path : str or Path
+        Path to the filled-in template file we want to write to disk.
+    params : Mapping[str,Any]
+        The list of parameters to write to the file, in replacement of the template tags.
+    strict : bool
+        True if all DYNAMIC_KEYS must be present in `params`.
+
+    Returns
+    -------
+    bool :
+        True if the code executes with no error.
+    """
+    output_path = Path(output_path)
+    output_path.write_text(render_config_toml(params, strict=strict), encoding="utf-8")
+    return True
diff --git a/src/xhydro/pygmet/make_toml_settings_pygmet.py b/src/xhydro/pygmet/make_toml_settings_pygmet.py
new file mode 100644
index 00000000..3cc924d1
--- /dev/null
+++ b/src/xhydro/pygmet/make_toml_settings_pygmet.py
@@ -0,0 +1,234 @@
+"""
+Hard-coded template writer for the PyGMET settings file.
+
+This module writes a new TOML settings file from a fixed template, replacing templated values with desired values.
+"""
+
+from __future__ import annotations
+from collections.abc import Mapping, Sequence
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+
+@dataclass(frozen=True)
+class RawToml:
+    """Inject TOML verbatim."""
+
+    text: str
+
+
+def _toml_escape_single_quoted(s: str) -> str:
+    """
+    Manage the single quotes in strings.
+
+    Parameters
+    ----------
+    s : str
+        String that has single quotes and needs to be processed.
+
+    Returns
+    -------
+    str :
+        The string with the processed single quotes.
+    """
+    # TOML single-quoted strings are literal except that you must escape single quotes.
+    return "'" + s.replace("'", "''") + "'"
+
+
+def to_toml(value: Any) -> str:
+    """
+    Convert common Python types to TOML literals.
+
+    Parameters
+    ----------
+    value : Any
+        Value to convert to TOML literal.
+
+    Returns
+    -------
+    str :
+        The string literal writable to TOML file.
+    """
+    if isinstance(value, RawToml):
+        return value.text
+    if isinstance(value, bool):
+        return "true" if value else "false"
+    if isinstance(value, int):
+        return str(value)
+    if isinstance(value, float):
+        return repr(value)
+    if isinstance(value, str):
+        return _toml_escape_single_quoted(value)
+    if isinstance(value, (list, tuple)):
+        return "[" + ", ".join(to_toml(v) for v in value) + "]"
+    if value is None:
+        return "''"
+    raise TypeError(f"Unsupported type for TOML conversion: {type(value).__name__}")
+
+
+DYNAMIC_KEYS: Sequence[str] = (
+    "stn_lat_name",
+    "stn_lon_name",
+    "grid_lat_name",
+    "grid_lon_name",
+    "grid_mask_name",
+    "dynamic_grid_lat_name",
+    "dynamic_grid_lon_name",
+    "nearstn_min",
+    "nearstn_max",
+    "try_radius",
+    "initial_distance",
+)
+
+TOML_TEMPLATE = """\
+# Some default settings
+
+########################################################################################################################
+# general settings
+########################################################################################################################
+
+# master seed: control all probabilistic process (e.g., probabilistic estimation, machine learning methods)
+# a negative value means random generation without reproducibility
+master_seed = 20230104
+
+########################################################################################################################
+# settings for gridded estimation using regression or machine learning methods
+########################################################################################################################
+
+############################## station/grid file dimension name
+# the spatial dims of input stations
+stn_lat_name = {stn_lat_name}
+stn_lon_name = {stn_lon_name}
+
+# the 2D spatial dims of the target grid domain
+# note that target grid domain netcdf must have x/y dims, while lat and lon are 2D arrays
+grid_lat_name = {grid_lat_name}
+grid_lon_name = {grid_lon_name}
+grid_mask_name = {grid_mask_name}
+
+# the 2D spatial dims of dynamic predictor inputs
+dynamic_grid_lat_name = {dynamic_grid_lat_name}
+dynamic_grid_lon_name = {dynamic_grid_lon_name}
+
+########################################################################################################################
+# default settings
+# they are as useful as the above settings but using their default values does not affect model run for any case
+########################################################################################################################
+
+# gridding methods: locally weighted regression and meachine learning methods.
+# Sklearn module is used to support most functions: https://scikit-learn.org/stable/supervised_learning.html
+# Locally weighted regression.
+#   Two original methods are LWR:Linear and LWR:Logistic.
+#   Sklearn-based methods support simple usage with "model.fit()" and "model.predict" or "model.predict_prob",
+#   in the format of LWR:linear_model.METHOD
+#   Examples of METHOD are LinearRegression, LogisticRegression, Ridge, BayesianRidge, ARDRegression, Lasso, ElasticNet, Lars, etc
+# Global regression using machine learnig methods:
+#   Machine learning methods are supported by sklearn. Parameters of methods supported by sklearn can be defined at the bottom of this
+#   configuration file (optional)
+#   Examples: ensemble.RandomForestRegressor, ensemble.RandomForestClassifier, neural_network.MLPRegressor, neural_network.MLPClassifier,
+#   ensemble.GradientBoostingClassifier, ensemble.GradientBoostingRegressor
+# The parameters of sklearn methods can be defined in the [sklearn] section
+gridcore_continuous = 'LWR:Linear'
+gridcore_classification = 'LWR:Logistic' # for probability of event
+n_splits = 10 # only useful for machine learning methods. cross validation to generate uncertainty estimates.
+
+# output random fields
+output_randomfield = false
+
+# Number of stations to consider for each target point. nearstn_min<=nearstn_max.
+nearstn_min = {nearstn_min}  # nearby stations: minimum number
+nearstn_max = {nearstn_max} # nearby stations: maximum number
+
+# first try this radius (km). if not enough, expand. Could be useful to reduce computation time for large domain search.
+try_radius = {try_radius}
+
+# overwrite existing files
+overwrite_stninfo = true
+overwrite_station_cc = true
+overwrite_weight = true
+overwrite_cv_reg = true
+overwrite_grid_reg = true
+overwrite_ens = true
+overwrite_spcorr = true
+
+########################################################################################################################
+# distance-based weight calculation
+########################################################################################################################
+
+initial_distance = {initial_distance} # Initial Search Distance in km (expanded if need be)
+
+# Weight calculation formula. Only two variables/parameters are allowed in the formula
+# dist: distance between points (km in the script)
+# maxdist (optional): max(initial_distance, max(dist)+1), which is a parameter used in weight calculation
+# 3 is the exponential factor and is the default parameter
+weight_formula = '(1 - (dist / maxdist) ** 3) ** 3'
+
+
+########################################################################################################################
+# method-related settings
+# default values can be directly used
+########################################################################################################################
+
+[transform]
+# note: the name must be consistent with transform_vars
+[transform.boxcox]
+exp = 4
+
+[sklearn]
+# if no parameters are provided or if the section does not even exist, default parameters will be used.
+# just provide the method name, no need to include the submodule name
+[sklearn.RandomForestRegressor]
+n_estimators = 500 # a example of RandomForestRegressor parameter
+n_jobs = 5
+[sklearn.RandomForestClassifier]
+n_estimators = 500 # a example of RandomForestRegressor parameter
+n_jobs = 5
+"""
+
+
+def render_model_settings_toml(params: Mapping[str, Any], strict: bool = True) -> str:
+    """
+    Render the template into a TOML settings string.
+
+    Parameters
+    ----------
+    params : Mapping[str, Any]
+        The list of parameters to write to the file, in replacement of the template tags.
+    strict : bool
+        True if all DYNAMIC_KEYS must be present in `params`.
+
+    Returns
+    -------
+    str :
+       The full template with tags replaced.
+    """
+    missing = [k for k in DYNAMIC_KEYS if k not in params]
+    if strict and missing:
+        raise KeyError(f"Missing replacements for keys: {missing}")
+
+    mapping = {k: to_toml(params[k]) for k in DYNAMIC_KEYS if k in params}
+    return TOML_TEMPLATE.format_map(mapping)
+
+
+def write_settings_toml(output_path: str | Path, params: Mapping[str, Any], strict: bool = True) -> bool:
+    """
+    Write a fresh model settings TOML file from the hard-coded template.
+
+    Parameters
+    ----------
+    output_path : str or Path
+        Path to the filled-in template file we want to write to disk.
+    params : Mapping[str,Any]
+        The list of parameters to write to the file, in replacement of the template tags.
+    strict : bool
+        True if all DYNAMIC_KEYS must be present in `params`.
+
+    Returns
+    -------
+    bool :
+        True if the code executes with no error.
+    """
+    output_path = Path(output_path)
+    output_path.write_text(render_model_settings_toml(params, strict=strict), encoding="utf-8")
+    return True
diff --git a/src/xhydro/pygmet/subsample_vector_format_stations.py b/src/xhydro/pygmet/subsample_vector_format_stations.py
new file mode 100644
index 00000000..dd473326
--- /dev/null
+++ b/src/xhydro/pygmet/subsample_vector_format_stations.py
@@ -0,0 +1,102 @@
+"""Subsampling tools to select stations for PyGMET variability."""
+
+import numpy as np
+import xarray as xr
+
+
+def isel_every_about_n(
+    path_to_nc: str,
+    dim: str,
+    outpath: str,
+    rng: float | None,
+    threshold: float = 0.1,
+    n: int = 10,
+    jitter: int = 1,
+):
+    """
+    Select along `dim` with step sizes drawn from {n-jitter, …, n+jitter}. Works for Dataset or DataArray.
+
+    Parameters
+    ----------
+    path_to_nc : str
+        Path to the file containing the dataset of the full data series (lat/long) that we want to subsample.
+    dim : str
+        Dimension to operate the subsampling on. Example: stn for stations in a vector of stations.
+    outpath : str
+        Path to the output file to write to disk.
+    rng : float
+        Initial seed for the random number generator. Using 'None' will make it random.
+    threshold : float
+        Threshold for the amount of precipitation considered trace, as to not influence the distribution vs machine
+        precision of the reanalysis models.
+    n : int
+        Average interval length to jump from one to the next sampling point.
+    jitter : int
+        Noise added to the interval to randomize the search pattern and not systematically sample ever n points.
+
+    Returns
+    -------
+    bool :
+        Returns True if the function exited properly.
+    """
+    # Clean precip (threshold and format)
+    ds = clean_precip(path_to_nc, threshold=threshold)
+
+    # Get the random seed.
+    rng = np.random.default_rng(rng)
+
+    # Maximum available value to sample.
+    k = ds.sizes[dim]
+
+    # Draw steps until we pass the end; then build cumulative indices.
+    steps = []
+    start = 0
+
+    while True:
+        step = n + rng.integers(-jitter, jitter + 1)
+        if step <= 0:
+            continue  # guard against pathological jitter
+        steps.append(step)
+        if start + sum(steps) >= k:
+            break
+    idx = start + np.cumsum([0] + steps[:-1])  # starting index + cumulative steps
+    idx = idx[idx < k]
+
+    # Build and save the dataset with the dimension subsampled according to the built index.
+    subset = ds.isel({dim: idx})
+    subset.to_netcdf(outpath)
+
+    return True
+
+
+def clean_precip(
+    ds_path: str,
+    threshold: float = 0.1,
+) -> xr.Dataset:
+    """
+    Convert to float32 and remove any negative precipitation value.
+
+    Parameters
+    ----------
+    ds_path : str
+        Path to the netcdf file containing the precip to load and clean.
+    threshold : float
+        Threshold for the amount of precipitation considered trace, as to not influence the distribution vs machine
+        precision of the reanalysis models.
+
+    Returns
+    -------
+    xr.Dataset :
+        The cleaned precipitation dataset.
+    """
+    # Load the precipitation dataset.
+    ds = xr.open_dataset(ds_path)
+
+    # Convert to float32.
+    ds["precip"] = ds["precip"].astype(np.float32)
+
+    # Apply threshold for minimum trace value.
+    ds["precip"].values[ds["precip"].values < threshold] = 0.0
+
+    # Return the cleaned dataset
+    return ds
diff --git a/src/xhydro/pygmet/transform_to_station_order.py b/src/xhydro/pygmet/transform_to_station_order.py
new file mode 100644
index 00000000..6d5d87c0
--- /dev/null
+++ b/src/xhydro/pygmet/transform_to_station_order.py
@@ -0,0 +1,174 @@
+"""Functions for processing OI data into formats used by PyGMET."""
+
+import numpy as np
+import xarray as xr
+
+
+def convert_2d_nc_to_1d_stations(path_nc_oi_precip: str, path_nc_grid_temperature: str, outpath: str) -> bool:
+    """
+    Transform the 2D ERA5Land data into a 1D station-like format.
+
+    Parameters
+    ----------
+    path_nc_oi_precip : str
+        Path to the nc file containing the output of the optimal interpolation results.
+    path_nc_grid_temperature : str
+        Path to the nc file containing the gridded temperature matching the times and locations of the OI precip
+        product.
+    outpath : str
+        Path to the output file containing the nan-less station-like vector netcdf of ERA5Land + OI grids (used as
+        stations in PyGMET).
+
+    Returns
+    -------
+    bool :
+        True if the code ran correctly, error message if not.
+    """
+    # NetCDF file with ERA5Land precipitation processed with OI (only precip)
+    ds = xr.open_dataset(path_nc_oi_precip)
+
+    # OI code will return a percentile, so this is to take care of that particular case.
+    if "percentile" in ds:
+        ds = ds.sel(percentile=50)
+
+    # Transpose to work with data in correct dimensions.
+    ds = ds.transpose("longitude", "latitude", "time")
+
+    # NetCDF file with ERA5Land tmin and tmax.
+    dstemp = xr.open_dataset(path_nc_grid_temperature)
+
+    # Get the same times.
+    dstemp = dstemp.sel(time=slice(ds["time"].values[0], ds["time"].values[-1]))
+
+    # We want to create a mask of NaN values. Precipitation processed with OI will not have NaNs so we need to use the
+    # temperature to find them.
+    maskvar = np.empty((len(ds.longitude), len(ds.latitude)))
+    maskvar[:] = 1
+    temp_c = dstemp["tasmax"].transpose("longitude", "latitude", "time")
+
+    # find the nan values by taking a random day. I don't like taking the 1st so I use 10 here.
+    nanmask = temp_c[:, :, 10].isnull()
+    maskvar[nanmask] = 0
+
+    # Make the nan mask 3D for the application to the initial dataset of lat x lon x time.
+    mask_3d_broadcast = np.broadcast_to(nanmask, [ds["tp"].shape[2], nanmask.shape[0], nanmask.shape[1]])
+    mask_3d_broadcast = mask_3d_broadcast.transpose(1, 2, 0)
+
+    # Apply the mask to precipitation. Recall that temperature already has the NaNs.
+    ds["tp"].values = ds["tp"].where(~mask_3d_broadcast, np.nan)
+
+    # Stack in station 1D by unrolling the 3D matrices into a 2D (station x time) matrix. Do this for the precip and
+    # also the temperature.
+    da_stn = ds["tp"].stack(stn=("latitude", "longitude"))
+    da_stn_tmax = dstemp["tasmax"].stack(stn=("latitude", "longitude"))
+    da_stn_tmin = dstemp["tasmin"].stack(stn=("latitude", "longitude"))
+
+    # Get the latitude and longitude values associated with each station.
+    lat_vals = da_stn.indexes["stn"].get_level_values("latitude")
+    lon_vals = da_stn.indexes["stn"].get_level_values("longitude")
+
+    # Remove stations that are all NaNs.
+    nan_columns = da_stn.isnull().all(dim="time").values
+    valid_mask = ~nan_columns
+    lat_vals = lat_vals[valid_mask]
+    lon_vals = lon_vals[valid_mask]
+
+    da_stn = da_stn.dropna(dim="stn", how="any")
+    da_stn_tmax = da_stn_tmax.dropna(dim="stn", how="any")
+    da_stn_tmin = da_stn_tmin.dropna(dim="stn", how="any")
+
+    # Rebuild the index of stations starting at 0.
+    da_stn = da_stn.assign_coords(stn=np.arange(da_stn.sizes["stn"]))
+
+    # Make the dataset, write to disk.
+    ds = xr.Dataset(
+        {
+            "precip": (("stn", "time"), da_stn.values.transpose()),
+            "tmax": (("stn", "time"), da_stn_tmax.values.transpose()),
+            "tmin": (("stn", "time"), da_stn_tmin.values.transpose()),
+            "latitude": (("stn"), lat_vals),
+            "longitude": (("stn"), lon_vals),
+        },
+        coords={"time": ds.time.values, "stn": np.arange(0, da_stn.sizes["stn"])},
+    )
+
+    ds["stnid"] = ds["stn"]
+    ds.to_netcdf(outpath)
+
+    return True
+
+
+def make_target_pygmet_grid(
+    ds_in_path: str,
+    outpath: str,
+):
+    """
+    Build the netcdf Dataset required by PyGMET to use as the output grid.
+
+    Parameters
+    ----------
+    ds_in_path : str
+        Path to the template dataset to use as the basis for 2D grid.
+    outpath : str
+        Path to the 2D mask file.
+
+    Returns
+    -------
+    bool :
+        True if the function exits with no error.
+    """
+    # Open the dataset
+    ds_in = xr.open_dataset(ds_in_path)
+    lat_grid = ds_in["latitude"].values
+    lon_grid = ds_in["longitude"].values
+
+    # Define some fixed parameters
+    nparam = 1
+
+    # Number of latitudes and longitudes (i.e. size of grid)
+    ny = lat_grid.shape[0]
+    nx = lon_grid.shape[0]
+
+    # indices of the coordinates
+    param = np.arange(nparam)  # ex. [0]
+
+    # Create the variables
+    # step for longitude and latitude
+    dx = np.full((nparam,), np.round(ds_in.longitude[1].values - ds_in.longitude[0].values, 5))
+    dy = np.full((nparam,), np.round(ds_in.latitude[1].values - ds_in.latitude[0].values, 5))
+
+    # Origins for latitude and longitude (from where the steps begin)
+    startx = np.array(np.round(ds_in.longitude.values[0], 5)).reshape(1)  # longitude origin
+    starty = np.array(np.round(ds_in.latitude.values[0], 5)).reshape(1)  # latitude origin
+
+    # Rebuild 2D array of lat/lon
+    lon2d, lat2d = np.meshgrid(lon_grid, lat_grid)
+
+    # Dimensions (lat, lon)
+    elev = np.ones((ny, nx))  # altitude (m), pygmet can use it as an extra covariate, but we don't.
+
+    # Make a mask based on the position of NaNs in the original gridded dataset.
+    mask = ds_in.tasmax.isel(time=1).transpose("latitude", "longitude")[:, :].notnull().astype(int).values
+
+    # Build the xarray Dataset
+    ds = xr.Dataset(
+        data_vars={
+            "dx": (("param",), dx),
+            "dy": (("param",), dy),
+            "startx": (("param",), startx),
+            "starty": (("param",), starty),
+            "elev": (("y", "x"), elev),
+            "latitude": (("y", "x"), lat2d),
+            "longitude": (("y", "x"), lon2d),
+            "mask": (("y", "x"), mask),
+        },
+        # Add coordinates
+        coords={
+            "param": param,
+            "y": lat_grid,
+            "x": lon_grid,
+        },
+    )
+
+    # Write to disk
+    ds.to_netcdf(outpath)
diff --git a/src/xhydro/testing/helpers.py b/src/xhydro/testing/helpers.py
index ebd31e25..8244bd30 100644
--- a/src/xhydro/testing/helpers.py
+++ b/src/xhydro/testing/helpers.py
@@ -29,7 +29,7 @@
     "populate_testing_data",
 ]
 
-default_testdata_version = "v2025.8.14"
+default_testdata_version = "v2026.02.04"
 """Default version of the testing data to use when fetching datasets."""
 
 default_testdata_repo_url = "https://raw.githubusercontent.com/hydrologie/xhydro-testdata/"
diff --git a/src/xhydro/testing/registry.txt b/src/xhydro/testing/registry.txt
index 4152058c..2f69afb6 100644
--- a/src/xhydro/testing/registry.txt
+++ b/src/xhydro/testing/registry.txt
@@ -15,6 +15,11 @@ optimal_interpolation/OI_data.zip sha256:9cd881a19fc82bda560e636d3f6a2c40718b82f
 optimal_interpolation/OI_data_corrected.zip sha256:48ee08325bd35c6bce5c0e52e3ee25df27c830720929060f607fb0417c476941
 pmp/CMIP.CCCma.CanESM5.historical.r1i1p1f1.day.gn.zarr.zip sha256:d5775b2f09381f2f3a7cc06af76f62fb216b60252aab3a602280a513554d59ad
 pmp/CMIP.CCCma.CanESM5.historical.r1i1p1f1.fx.gn.zarr.zip sha256:dc7c92fc098ca5adf43e76b25e3f79b70815ed961e945952983bb68b3c380cf1
+precip_oi/ERA5Land_with_OI_flags_validation_subset.nc sha256:50fc8d562443efe82998735c50268019532493bc02b47e4a47f6672708c87b5f
+precip_oi/ERA5_land_3_variables_raw_subset.nc sha256:09ee75f73bb4e88bddb6c9184245766ebe8cc6b13b58ef58a4754e523138ff1f
+precip_oi/stations_flags_clean_subset.nc sha256:03513b859e6c99ab02728f49b0e3e7382375e7311c68e33c8d59d6f734eb08d1
+pygmet/subset_grid_temperature.nc sha256:66a3086d9aaacbedac6633050380b9b7685407c952a3f8629a16caea4d1ef8db
+pygmet/subset_oi_tp.nc sha256:1cddcb439168891e4ad025c7708c6d0ce1017bbb013ac2fe46589dc3fb5953da
 ravenpy/Debit_Riviere_Rouge.nc sha256:d0a27de5eb3cb466e60669d894296bcbc4e9f590edc1ae2490685babd10b2d22
 ravenpy/ERA5_Riviere_Rouge_global.nc sha256:341ac746130a0d3e3189d3a41dc8528d6bd22869a519b68e134959407ad200a3
 ravenpy/hru_subset.zip sha256:969b4a0d0c1f0c24dbb5baca98676f2573178cd58ef4162aa6b9fce51ceecf06
diff --git a/tests/test_pygmet_prep.py b/tests/test_pygmet_prep.py
new file mode 100644
index 00000000..7491900c
--- /dev/null
+++ b/tests/test_pygmet_prep.py
@@ -0,0 +1,224 @@
+import tempfile
+
+import numpy as np
+import pandas as pd
+import xarray as xr
+from numpy.ma.testutils import assert_almost_equal
+
+from xhydro.pygmet.make_toml_config_pygmet import write_config_toml
+from xhydro.pygmet.make_toml_settings_pygmet import write_settings_toml
+from xhydro.pygmet.subsample_vector_format_stations import isel_every_about_n
+from xhydro.pygmet.transform_to_station_order import convert_2d_nc_to_1d_stations, make_target_pygmet_grid
+
+
+def test_make_pygmet_config():
+    params = {
+        "case_name": "My_case_01",
+        "num_processes": 10,
+        "modelsettings_file": "model.settings_xhydro.toml",
+        "input_stn_all": "./stations.nc",
+        "infile_grid_domain": "./grid_domain.nc",
+        "outpath_parent": "./output_dir",
+        "date_start": "1970-01-01",
+        "date_end": "2024-12-31",
+        "input_vars": ["precip", "tmin", "tmax"],
+        "target_vars": ["precip", "tmin", "tmax"],
+        "target_vars_WithProbability": ["precip"],
+        "minRange_vars": [0.0, -70.0, -70.0],
+        "maxRange_vars": [250.0, 70.0, 70.0],
+        "transform_vars": ["boxcox", "", ""],
+        "predictor_name_static_stn": ["latitude", "longitude"],
+        "predictor_name_static_grid": ["latitude", "longitude"],
+        "ensemble_end": 100,
+        "clen": [150, 800, 800],  # codespell:ignore clen
+        "auto_corr_method": ["direct", "anomaly", "anomaly"],
+        "target_vars_max_constrain": ["precip"],
+    }
+    tempout = tempfile.mkdtemp()
+    outpath = tempout + "/model.config_xhydro.toml"
+    write_config_toml(outpath, params, strict=True)
+
+
+def test_make_pygmet_settings():
+    params = {
+        "stn_lat_name": "latitude",
+        "stn_lon_name": "longitude",
+        "grid_lat_name": "latitude",
+        "grid_lon_name": "longitude",
+        "grid_mask_name": "mask",
+        "dynamic_grid_lat_name": "latitude",
+        "dynamic_grid_lon_name": "longitude",
+        "nearstn_min": 2,
+        "nearstn_max": 16,
+        "try_radius": 200,
+        "initial_distance": 200,
+    }
+    tempout = tempfile.mkdtemp()
+    outpath = tempout + "/model.settings_xhydro.toml"
+    write_settings_toml(outpath, params, strict=True)
+
+
+class TestPreparePygmetInputs:
+    # Prepare the data for the subset OI precip grid.
+    # Coordinates
+    percentile = np.array([50], dtype=np.int64)
+    time = pd.date_range("1970-01-01", periods=15, freq="D")
+    latitude = np.round(50.0 - 0.1 * np.arange(10), 1).astype(np.float64)
+    longitude = np.round(-78.0 + 0.1 * np.arange(10), 1).astype(np.float64)
+
+    # Repeatable
+    rng = np.random.default_rng(12345)  # seed = repeatable
+    tp = rng.random((len(percentile), len(time), len(latitude), len(longitude))).astype(np.float64)
+
+    # Build dataset
+    ds = xr.Dataset(
+        data_vars={"tp": (("percentile", "time", "latitude", "longitude"), tp)},
+        coords={
+            "percentile": percentile,
+            "time": time,
+            "latitude": latitude,
+            "longitude": longitude,
+        },
+    )
+
+    # Write the file to temp location
+    tmpfile = tempfile.mkdtemp()
+    path_nc_oi_precip = tmpfile + "/subset_oi_tp.nc"
+    ds.to_netcdf(path_nc_oi_precip)
+
+    # Now prepare the data for the subset temperature grid.
+    # Coordinates
+    time = pd.date_range("1970-01-01", periods=15, freq="D")  # datetime64[ns]
+    longitude = np.round(-78.0 + 0.1 * np.arange(10), 1).astype(np.float64)  # -78.0 ... -77.1
+    latitude = np.round(50.0 - 0.1 * np.arange(10), 1).astype(np.float64)  # 50.0 ... 49.1
+
+    # Repeatable
+    rng = np.random.default_rng(12345)
+
+    shape_3d = (len(longitude), len(latitude), len(time))
+
+    # Base mean temperature field
+    tmean = rng.normal(loc=12.0, scale=6.0, size=shape_3d).astype(np.float32)
+
+    # Positive daily range
+    trange = rng.uniform(3.0, 12.0, size=shape_3d).astype(np.float32)
+    tasmin = (tmean - 0.5 * trange).astype(np.float32)
+    tasmax = (tmean + 0.5 * trange).astype(np.float32)
+
+    # Altitude (m), repeatable
+    altitude = rng.uniform(0, 1200, size=(len(longitude), len(latitude))).astype(np.float64)
+
+    # Build dataset
+    ds = xr.Dataset(
+        data_vars={
+            "tasmax": (("longitude", "latitude", "time"), tasmax),
+            "tasmin": (("longitude", "latitude", "time"), tasmin),
+            "altitude": (("longitude", "latitude"), altitude),
+        },
+        coords={
+            "longitude": longitude,
+            "latitude": latitude,
+            "time": time,
+        },
+    )
+
+    # Write file to temp location as well.
+    tmpfile_temperature = tempfile.mkdtemp()
+    path_nc_grid_temperature = tmpfile_temperature + "/subset_grid_temperature.nc"
+    ds.to_netcdf(path_nc_grid_temperature)
+
+    # Intermediate data for reordered stations
+    stn = np.arange(100, dtype=np.int64)
+    time = pd.date_range("1970-01-01", periods=15, freq="D")  # datetime64[ns]
+    # Build station lat/lon from a 10x10 grid (same values/ranges as previous grid example)
+    lon_1d = np.round(-78.0 + 0.1 * np.arange(10), 1).astype(np.float64)
+    lat_1d = np.round(50.0 - 0.1 * np.arange(10), 1).astype(np.float64)
+
+    # IMPORTANT: indexing="ij" matches grid dim order (longitude, latitude)
+    lon2d, lat2d = np.meshgrid(lon_1d, lat_1d, indexing="ij")  # both shape (10, 10)
+
+    # Flatten to stations (100 points). Order="C" = latitude varies fastest within each longitude.
+    longitude = lon2d.ravel(order="C")
+    latitude = lat2d.ravel(order="C")
+
+    # Station IDs
+    stnid = (stn).astype(np.int64)
+
+    # 2) Repeatable data
+    rng = np.random.default_rng(12345)  # seed -> repeatable
+    shape = (len(stn), len(time))
+
+    # Precip
+    precip = rng.gamma(shape=2.0, scale=2.0, size=shape).astype(np.float64)
+
+    # Temperatures
+    tmean = rng.normal(loc=12.0, scale=6.0, size=shape).astype(np.float32)  # mean °C
+    trange = rng.uniform(3.0, 12.0, size=shape).astype(np.float32)  # daily range °C (>0)
+    tmin = (tmean - 0.5 * trange).astype(np.float32)
+    tmax = (tmean + 0.5 * trange).astype(np.float32)
+
+    # 3) Build Dataset
+    ds_stn = xr.Dataset(
+        data_vars={
+            "precip": (("stn", "time"), precip),
+            "tmax": (("stn", "time"), tmax),
+            "tmin": (("stn", "time"), tmin),
+            "latitude": (("stn",), latitude),
+            "longitude": (("stn",), longitude),
+            "stnid": (("stn",), stnid),
+        },
+        coords={
+            "stn": stn,
+            "time": time,
+        },
+    )
+
+    # Write file to temp location as well.
+    tmpfile_reordered = tempfile.mkdtemp()
+    path_nc_grid_reordered = tmpfile_reordered + "/subset_reordered_grids_to_stations.nc"
+    ds_stn.to_netcdf(path_nc_grid_reordered)
+
+    # tempout folder
+    tempout = tempfile.mkdtemp()
+
+    def test_convert_2d_to_1d(self):
+        # Run the code
+        convert_2d_nc_to_1d_stations(
+            self.path_nc_oi_precip,
+            self.path_nc_grid_temperature,
+            self.tempout + "/subset_reordered_grids_to_stations.nc/",
+        )
+
+        ds = xr.open_dataset(self.tempout + "/subset_reordered_grids_to_stations.nc/")
+        assert_almost_equal(ds.precip.isel(stn=5, time=5).values, 0.2762543, 5)
+        assert_almost_equal(ds.tmax.isel(stn=6, time=5).values, 13.288241, 5)
+        assert ds.stn.shape[0] == 100
+        assert ds.stn[-1] == 99
+
+    def test_subsample_stations_for_pygmet(self):
+        # Take roughly 1 out of every ~10 stations
+        isel_every_about_n(
+            path_to_nc=self.path_nc_grid_reordered,
+            dim="stn",
+            outpath=self.tempout + "/subset_oi_vector_format_subsampled.nc/",
+            rng=42,
+            threshold=0.1,
+            n=10,
+            jitter=3,
+        )
+
+        ds = xr.open_dataset(self.tempout + "/subset_oi_vector_format_subsampled.nc/")
+        assert_almost_equal(ds.precip.isel(stn=5, time=5).values, 3.057068, 5)
+        assert_almost_equal(ds.tmax.isel(stn=6, time=5).values, 10.385146, 5)
+        assert ds.stn.shape[0] == 11
+        assert ds.stn[-1] == 96
+
+    def test_make_target_pygmet_grid(self):
+        make_target_pygmet_grid(
+            self.path_nc_grid_temperature,
+            self.tempout + "/new_grid_output_shape.nc",
+        )
+
+        ds = xr.open_dataset(self.tempout + "/new_grid_output_shape.nc")
+        assert ds.y[0] == 50
+        assert ds.mask[5, 5].values == 1