From ac858cdc4b8e0b6a86d70e9db4980934912b6e26 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 2 Jun 2025 14:20:28 +0200
Subject: [PATCH 01/84] add a cross-sectional dgp

---
 doubleml/did/datasets/dgp_did_cs_CS2021.py | 190 +++++++++++++++++++++
 1 file changed, 190 insertions(+)
 create mode 100644 doubleml/did/datasets/dgp_did_cs_CS2021.py

diff --git a/doubleml/did/datasets/dgp_did_cs_CS2021.py b/doubleml/did/datasets/dgp_did_cs_CS2021.py
new file mode 100644
index 00000000..95119b94
--- /dev/null
+++ b/doubleml/did/datasets/dgp_did_cs_CS2021.py
@@ -0,0 +1,190 @@
+import numpy as np
+
+from doubleml.did.datasets.dgp_did_CS2021 import make_did_CS2021
+
+# Based on https://doi.org/10.1016/j.jeconom.2020.12.001 (see Appendix SC)
+# and https://d2cml-ai.github.io/csdid/examples/csdid_basic.html#Examples-with-simulated-data
+# Cross-sectional version of the data generating process (DGP) for Callaway and Sant'Anna (2021)
+
+
+def make_did_cs_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, lambda_t=0.5, time_type="datetime", **kwargs):
+    """
+    Generate synthetic repeated cross-sectional data for difference-in-differences analysis based on
+    Callaway and Sant'Anna (2021).
+
+    This function creates repeated cross-sectional data with heterogeneous treatment effects across time periods and groups.
+    The data includes pre-treatment periods, multiple treatment groups that receive treatment at different times,
+    and optionally a never-treated group that serves as a control. The true average treatment effect on the
+    treated (ATT) has a heterogeneous structure dependent on covariates and exposure time.
+
+    The data generating process offers six variations (``dgp_type`` 1-6) that differ in how the regression features
+    and propensity score features are derived:
+
+    - DGP 1: Outcome and propensity score are linear (in Z)
+    - DGP 2: Outcome is linear, propensity score is nonlinear
+    - DGP 3: Outcome is nonlinear, propensity score is linear
+    - DGP 4: Outcome and propensity score are nonlinear
+    - DGP 5: Outcome is linear, propensity score is constant (experimental setting)
+    - DGP 6: Outcome is nonlinear, propensity score is constant (experimental setting)
+
+    Let :math:`X= (X_1, X_2, X_3, X_4)^T \\sim \\mathcal{N}(0, \\Sigma)`, where :math:`\\Sigma` is a matrix with entries
+    :math:`\\Sigma_{kj} = c^{|j-k|}`. The default value is :math:`c = 0`, corresponding to the identity matrix.
+
+    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
+    where :math:`\\tilde{Z}_1 = \\exp(0.5 \\cdot X_1)`, :math:`\\tilde{Z}_2 = 10 + X_2/(1 + \\exp(X_1))`,
+    :math:`\\tilde{Z}_3 = (0.6 + X_1 \\cdot X_3 / 25)^3` and :math:`\\tilde{Z}_4 = (20 + X_2 + X_4)^2`.
+
+    For a feature vector :math:`W=(W_1, W_2, W_3, W_4)^T` (either X or Z based on ``dgp_type``), the core functions are:
+
+    1. Time-varying outcome regression function for each time period :math:`t`:
+
+       .. math::
+
+           f_{reg,t}(W) = 210 + \\frac{t}{T} \\cdot (27.4 \\cdot W_1 + 13.7 \\cdot W_2 + 13.7 \\cdot W_3 + 13.7 \\cdot W_4)
+
+    2. Group-specific propensity function for each treatment group :math:`g`:
+
+       .. math::
+
+           f_{ps,g}(W) = \\xi \\cdot \\left(1-\\frac{g}{G}\\right) \\cdot
+           (-W_1 + 0.5 \\cdot W_2 - 0.25 \\cdot W_3 - 0.2\\cdot W_4)
+
+    where :math:`T` is the number of time periods, :math:`G` is the number of treatment groups, and :math:`\\xi` is a
+    scale parameter (default: 0.9).
+
+    The panel data model is defined with the following components:
+
+    1. Time effects: :math:`\\delta_t = t` for time period :math:`t`
+
+    2. Individual effects: :math:`\\eta_i \\sim \\mathcal{N}(g_i, 1)` where :math:`g_i` is unit :math:`i`'s treatment group
+
+    3. Treatment effects: For a unit in treatment group :math:`g`, the effect in period :math:`t` is:
+
+       .. math::
+
+           \\theta_{i,t,g} = \\max(t - t_g + 1, 0) + 0.1 \\cdot X_{i,1} \\cdot \\max(t - t_g + 1, 0)
+
+       where :math:`t_g` is the first treatment period for group :math:`g`, :math:`X_{i,1}` is the first covariate for unit
+       :math:`i`, and :math:`\\max(t - t_g + 1, 0)` represents the exposure time (0 for pre-treatment periods).
+
+    4. Potential outcomes for unit :math:`i` in period :math:`t`:
+
+       .. math::
+
+           Y_{i,t}(0) &= f_{reg,t}(W_{reg}) + \\delta_t + \\eta_i + \\varepsilon_{i,0,t}
+
+           Y_{i,t}(1) &= Y_{i,t}(0) + \\theta_{i,t,g} + (\\varepsilon_{i,1,t} - \\varepsilon_{i,0,t})
+
+       where :math:`\\varepsilon_{i,0,t}, \\varepsilon_{i,1,t} \\sim \\mathcal{N}(0, 1)`.
+
+    5. Observed outcomes:
+
+       .. math::
+
+           Y_{i,t} = Y_{i,t}(1) \\cdot 1\\{t \\geq t_g\\} + Y_{i,t}(0) \\cdot 1\\{t < t_g\\}
+
+    6. Treatment assignment:
+
+       For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is:
+
+       .. math::
+
+           P(G_i = g) = \\frac{\\exp(f_{ps,g}(W_{ps}))}{\\sum_{g'} \\exp(f_{ps,g'}(W_{ps}))}
+
+       For experimental settings (DGP 5-6), each treatment group (including never-treated) has equal probability:
+
+       .. math::
+
+           P(G_i = g) = \\frac{1}{G} \\text{ for all } g
+
+    7. Steps 1-6 generate panel data. To obtain repeated cross-sectional data, the number of generated indivials is increased
+    to `n_obs/lambda_t`, where `lambda_t` denotes the pobability to observe a unit at each time period (time constant).
+    for each
+
+
+    The variables :math:`W_{reg}` and :math:`W_{ps}` are selected based on the DGP type:
+
+    .. math::
+
+        DGP1:\\quad W_{reg} &= Z \\quad W_{ps} = Z
+
+        DGP2:\\quad W_{reg} &= Z \\quad W_{ps} = X
+
+        DGP3:\\quad W_{reg} &= X \\quad W_{ps} = Z
+
+        DGP4:\\quad W_{reg} &= X \\quad W_{ps} = X
+
+        DGP5:\\quad W_{reg} &= Z \\quad W_{ps} = 0
+
+        DGP6:\\quad W_{reg} &= X \\quad W_{ps} = 0
+
+    where settings 5-6 correspond to experimental designs with equal probability across treatment groups.
+
+
+    Parameters
+    ----------
+    n_obs : int, default=1000
+        The number of observations to simulate.
+
+    dgp_type : int, default=1
+        The data generating process to be used (1-6).
+
+    include_never_treated : bool, default=True
+        Whether to include units that are never treated.
+
+    lambda_t : float, default=0.5
+        Probability of observing a unit at each time period.
+
+    time_type : str, default="datetime"
+        Type of time variable. Either "datetime" or "float".
+
+    **kwargs
+        Additional keyword arguments. Accepts the following parameters:
+
+        `c` (float, default=0.0):
+            Parameter for correlation structure in X.
+
+        `dim_x` (int, default=4):
+            Dimension of feature vectors.
+
+        `xi` (float, default=0.9):
+            Scale parameter for the propensity score function.
+
+        `n_periods` (int, default=5):
+            Number of time periods.
+
+        `anticipation_periods` (int, default=0):
+            Number of periods before treatment where anticipation effects occur.
+
+        `n_pre_treat_periods` (int, default=2):
+            Number of pre-treatment periods.
+
+        `start_date` (str, default="2025-01"):
+            Start date for datetime time variables.
+
+    Returns
+    -------
+    pandas.DataFrame
+        DataFrame containing the simulated panel data.
+
+    References
+    ----------
+    Callaway, B. and Sant’Anna, P. H. (2021),
+    Difference-in-Differences with multiple time periods. Journal of Econometrics, 225(2), 200-230.
+    doi:`10.1016/j.jeconom.2020.12.001 <https://doi.org/10.1016/j.jeconom.2020.12.001>`_.
+    """
+
+    n_obs_panel = int(np.ceil(n_obs / lambda_t))
+    df_panel = make_did_CS2021(
+        n_obs=n_obs_panel,
+        dgp_type=dgp_type,
+        include_never_treated=include_never_treated,
+        time_type=time_type,
+        **kwargs,
+    )
+
+    # for each time period, randomly select units to observe
+    observed_units = np.random.binomial(1, lambda_t, size=(len(df_panel.index)))
+    df_repeated_cs = df_panel[observed_units == 1].copy()
+
+    return df_repeated_cs

From 10e532e79600cced091cf471c729269ba7b7b983 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 2 Jun 2025 14:21:04 +0200
Subject: [PATCH 02/84] add simple test cases for cross sectional dgp

---
 doubleml/did/datasets/__init__.py   |  2 ++
 doubleml/did/tests/test_datasets.py | 54 ++++++++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/doubleml/did/datasets/__init__.py b/doubleml/did/datasets/__init__.py
index aaa5fc0a..306e7b10 100644
--- a/doubleml/did/datasets/__init__.py
+++ b/doubleml/did/datasets/__init__.py
@@ -3,9 +3,11 @@
 """
 
 from .dgp_did_CS2021 import make_did_CS2021
+from .dgp_did_cs_CS2021 import make_did_cs_CS2021
 from .dgp_did_SZ2020 import make_did_SZ2020
 
 __all__ = [
     "make_did_SZ2020",
     "make_did_CS2021",
+    "make_did_cs_CS2021",
 ]
diff --git a/doubleml/did/tests/test_datasets.py b/doubleml/did/tests/test_datasets.py
index 0e323ec9..54eb4074 100644
--- a/doubleml/did/tests/test_datasets.py
+++ b/doubleml/did/tests/test_datasets.py
@@ -3,7 +3,7 @@
 import pytest
 
 from doubleml import DoubleMLData
-from doubleml.did.datasets import make_did_CS2021, make_did_SZ2020
+from doubleml.did.datasets import make_did_CS2021, make_did_cs_CS2021, make_did_SZ2020
 
 msg_inv_return_type = "Invalid return_type."
 
@@ -77,3 +77,55 @@ def test_make_did_CS2021_exceptions():
     msg = r"time_type must be one of \('datetime', 'float'\). Got 2."
     with pytest.raises(ValueError, match=msg):
         _ = make_did_CS2021(n_obs=100, time_type=2)
+
+
+@pytest.fixture(scope="function", params=[0.5, 0.1])
+def lambda_t(request):
+    return request.param
+
+
+@pytest.mark.ci
+def test_make_did_cs_CS2021_return_types(dgp_type, include_never_treated, lambda_t, time_type, anticipation_periods):
+    np.random.seed(3141)
+    df = make_did_cs_CS2021(
+        n_obs=100,
+        dgp_type=dgp_type,
+        include_never_treated=include_never_treated,
+        lambda_t=lambda_t,
+        time_type=time_type,
+        anticipation_periods=anticipation_periods,
+    )
+    assert isinstance(df, pd.DataFrame)
+
+
+@pytest.mark.ci
+def test_panel_vs_cs_make_did_CS2021(dgp_type, include_never_treated, time_type, anticipation_periods):
+    np.random.seed(3141)
+    df_cs = make_did_cs_CS2021(
+        n_obs=100,
+        dgp_type=dgp_type,
+        include_never_treated=include_never_treated,
+        lambda_t=1.0,
+        time_type=time_type,
+        anticipation_periods=anticipation_periods,
+    )
+
+    np.random.seed(3141)
+    df_panel = make_did_CS2021(
+        n_obs=100,
+        dgp_type=dgp_type,
+        include_never_treated=include_never_treated,
+        time_type=time_type,
+        anticipation_periods=anticipation_periods,
+    )
+
+    # check if df_cs close to df_panel
+    assert df_cs.shape[0] == df_panel.shape[0]
+    # Select numerical columns
+    df_cs_numeric = df_cs.select_dtypes(include=np.number)
+    df_panel_numeric = df_panel.select_dtypes(include=np.number)
+
+    # Ensure the same numerical columns are being compared, in the same order
+    pd.testing.assert_index_equal(df_cs_numeric.columns, df_panel_numeric.columns)
+
+    assert np.allclose(df_cs_numeric.values, df_panel_numeric.values, atol=1e-5, rtol=1e-5)

From c96605d28b6628c7d1bcf32c8fee0e0f8609b171 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Tue, 3 Jun 2025 13:28:48 +0200
Subject: [PATCH 03/84] reset index for in panel data

---
 doubleml/data/panel_data.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doubleml/data/panel_data.py b/doubleml/data/panel_data.py
index f548ae6a..4e416183 100644
--- a/doubleml/data/panel_data.py
+++ b/doubleml/data/panel_data.py
@@ -106,6 +106,8 @@ def __init__(
             force_all_x_finite=force_all_x_finite,
             force_all_d_finite=False,
         )
+        # reset index to ensure a simple RangeIndex
+        self.data.reset_index(drop=True, inplace=True)
         if self.n_treat != 1:
             raise ValueError("Only one treatment column is allowed for panel data.")
 

From 61dbf11470ca1f97be57196ac8c2b03e83ed94f6 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Tue, 3 Jun 2025 13:29:30 +0200
Subject: [PATCH 04/84] add basic did_cs_binary version with simple tests

---
 doubleml/did/__init__.py                      |   2 +
 doubleml/did/did_cs_binary.py                 | 592 ++++++++++++++++++
 ...test_did_cs_binary_external_predictions.py |  92 +++
 ...test_did_cs_binary_vs_did_cs_two_period.py | 163 +++++
 4 files changed, 849 insertions(+)
 create mode 100644 doubleml/did/did_cs_binary.py
 create mode 100644 doubleml/did/tests/test_did_cs_binary_external_predictions.py
 create mode 100644 doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py

diff --git a/doubleml/did/__init__.py b/doubleml/did/__init__.py
index 354ffaa5..369353ef 100644
--- a/doubleml/did/__init__.py
+++ b/doubleml/did/__init__.py
@@ -6,6 +6,7 @@
 from .did_aggregation import DoubleMLDIDAggregation
 from .did_binary import DoubleMLDIDBinary
 from .did_cs import DoubleMLDIDCS
+from .did_cs_binary import DoubleMLDIDCSBinary
 from .did_multi import DoubleMLDIDMulti
 
 __all__ = [
@@ -13,5 +14,6 @@
     "DoubleMLDID",
     "DoubleMLDIDCS",
     "DoubleMLDIDBinary",
+    "DoubleMLDIDCSBinary",
     "DoubleMLDIDMulti",
 ]
diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
new file mode 100644
index 00000000..ce57384c
--- /dev/null
+++ b/doubleml/did/did_cs_binary.py
@@ -0,0 +1,592 @@
+import warnings
+
+import numpy as np
+from sklearn.utils import check_X_y
+
+from doubleml.data.panel_data import DoubleMLPanelData
+from doubleml.did.utils._did_utils import (
+    _check_anticipation_periods,
+    _check_control_group,
+    _check_gt_combination,
+    _check_gt_values,
+    _get_id_positions,
+    _get_never_treated_value,
+    _is_never_treated,
+    _set_id_positions,
+)
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_score_mixins import LinearScoreMixin
+from doubleml.utils._checks import (
+    _check_bool,
+    _check_finite_predictions,
+    _check_is_propensity,
+    _check_score,
+    _check_trimming,
+)
+from doubleml.utils._estimation import _dml_cv_predict, _get_cond_smpls_2d
+from doubleml.utils._propensity_score import _trimm
+
+
+class DoubleMLDIDCSBinary(LinearScoreMixin, DoubleML):
+
+    def __init__(
+        self,
+        obj_dml_data,
+        g_value,
+        t_value_pre,
+        t_value_eval,
+        ml_g,
+        ml_m=None,
+        control_group="never_treated",
+        anticipation_periods=0,
+        n_folds=5,
+        n_rep=1,
+        score="observational",
+        in_sample_normalization=True,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+        print_periods=False,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting=False)
+
+        self._check_data(self._dml_data)
+        g_values = self._dml_data.g_values
+        t_values = self._dml_data.t_values
+
+        _check_bool(print_periods, "print_periods")
+        self._print_periods = print_periods
+        self._control_group = _check_control_group(control_group)
+        self._never_treated_value = _get_never_treated_value(g_values)
+        self._anticipation_periods = _check_anticipation_periods(anticipation_periods)
+
+        _check_gt_combination(
+            (g_value, t_value_pre, t_value_eval), g_values, t_values, self.never_treated_value, self.anticipation_periods
+        )
+        self._g_value = g_value
+        self._t_value_pre = t_value_pre
+        self._t_value_eval = t_value_eval
+
+        # check if post_treatment evaluation
+        if g_value <= t_value_eval:
+            post_treatment = True
+        else:
+            post_treatment = False
+
+        self._post_treatment = post_treatment
+
+        if self._print_periods:
+            print(
+                f"Evaluation of ATT({g_value}, {t_value_eval}), with pre-treatment period {t_value_pre},\n"
+                + f"post-treatment: {post_treatment}. Control group: {control_group}.\n"
+            )
+
+        # Preprocess data
+        self._data_subset = self._preprocess_data(self._g_value, self._t_value_pre, self._t_value_eval)
+
+        # Handling id values to match pairwise evaluation & simultaneous inference
+        if not np.all(np.isin(self.data_subset.index, self._dml_data.data.index)):
+            raise ValueError("The index values in the data subset are not a subset of the original index values.")
+
+        # Find position of data subset in original data
+        # These entries should be replaced by nuisance predictions, all others should be set to 0.
+        self._id_positions = self.data_subset.index
+
+        # Numeric values for positions of the entries in id_panel_data inside id_original
+        # np.nonzero(np.isin(id_original, id_panel_data))
+        self._n_subset = self.data_subset.shape[0]
+        self._n_obs = self._n_subset  # Effective sample size used for resampling
+
+        # Save x and y for later ML estimation
+        self._x_data = self.data_subset.loc[:, self._dml_data.x_cols].values
+        self._y_data = self.data_subset.loc[:, self._dml_data.y_col].values
+        self._g_data = self.data_subset.loc[:, "G_indicator"].values
+        self._t_data = self.data_subset.loc[:, "t_indicator"].values
+
+        valid_scores = ["observational", "experimental"]
+        _check_score(self.score, valid_scores, allow_callable=False)
+
+        self._in_sample_normalization = in_sample_normalization
+        if not isinstance(self.in_sample_normalization, bool):
+            raise TypeError(
+                "in_sample_normalization indicator has to be boolean. "
+                + f"Object of type {str(type(self.in_sample_normalization))} passed."
+            )
+
+        # set stratication for resampling
+        self._strata = self.data_subset["G_indicator"] + 2 * self.data_subset["t_indicator"]
+        if draw_sample_splitting:
+            self.draw_sample_splitting()
+
+        # check learners
+        ml_g_is_classifier = self._check_learner(ml_g, "ml_g", regressor=True, classifier=True)
+        if self.score == "observational":
+            _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+            self._learner = {"ml_g": ml_g, "ml_m": ml_m}
+        else:
+            assert self.score == "experimental"
+            if ml_m is not None:
+                warnings.warn(
+                    (
+                        'A learner ml_m has been provided for score = "experimental" but will be ignored. '
+                        "A learner ml_m is not required for estimation."
+                    )
+                )
+            self._learner = {"ml_g": ml_g}
+
+        if ml_g_is_classifier:
+            if obj_dml_data.binary_outcome:
+                self._predict_method = {"ml_g": "predict_proba"}
+            else:
+                raise ValueError(
+                    f"The ml_g learner {str(ml_g)} was identified as classifier "
+                    "but the outcome variable is not binary with values 0 and 1."
+                )
+        else:
+            self._predict_method = {"ml_g": "predict"}
+
+        if "ml_m" in self._learner:
+            self._predict_method["ml_m"] = "predict_proba"
+        self._initialize_ml_nuisance_params()
+
+        self._trimming_rule = trimming_rule
+        self._trimming_threshold = trimming_threshold
+        _check_trimming(self._trimming_rule, self._trimming_threshold)
+
+        self._sensitivity_implemented = False
+        self._external_predictions_implemented = True
+
+    @property
+    def g_value(self):
+        """
+        The value indicating the treatment group (first period with treatment).
+        """
+        return self._g_value
+
+    @property
+    def t_value_eval(self):
+        """
+        The value indicating the evaluation period.
+        """
+        return self._t_value_eval
+
+    @property
+    def t_value_pre(self):
+        """
+        The value indicating the pre-treatment period.
+        """
+        return self._t_value_pre
+
+    @property
+    def never_treated_value(self):
+        """
+        The value indicating that a unit was never treated.
+        """
+        return self._never_treated_value
+
+    @property
+    def post_treatment(self):
+        """
+        Indicates whether the evaluation period is after the treatment period.
+        """
+        return self._post_treatment
+
+    @property
+    def control_group(self):
+        """
+        The control group.
+        """
+        return self._control_group
+
+    @property
+    def anticipation_periods(self):
+        """
+        The number of anticipation periods.
+        """
+        return self._anticipation_periods
+
+    @property
+    def data_subset(self):
+        """
+        The preprocessed data subset.
+        """
+        return self._data_subset
+
+    @property
+    def id_positions(self):
+        """
+        The positions of the id values in the original data.
+        """
+        return self._id_positions
+
+    @property
+    def in_sample_normalization(self):
+        """
+        Indicates whether the in sample normalization of weights are used.
+        """
+        return self._in_sample_normalization
+
+    @property
+    def trimming_rule(self):
+        """
+        Specifies the used trimming rule.
+        """
+        return self._trimming_rule
+
+    @property
+    def trimming_threshold(self):
+        """
+        Specifies the used trimming threshold.
+        """
+        return self._trimming_threshold
+
+    @property
+    def n_obs(self):
+        """
+        The number of observations used for estimation.
+        """
+        return self._n_subset
+
+    def _initialize_ml_nuisance_params(self):
+        if self.score == "observational":
+            valid_learner = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1", "ml_m"]
+        else:
+            assert self.score == "experimental"
+            valid_learner = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"]
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner}
+
+    def _check_data(self, obj_dml_data):
+        if not isinstance(obj_dml_data, DoubleMLPanelData):
+            raise TypeError(
+                "For repeated outcomes the data must be of DoubleMLPanelData type. "
+                f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
+        if obj_dml_data.z_cols is not None:
+            raise NotImplementedError(
+                "Incompatible data. " + " and ".join(obj_dml_data.z_cols) + " have been set as instrumental variable(s). "
+                "At the moment there are not DiD models with instruments implemented."
+            )
+
+        one_treat = obj_dml_data.n_treat == 1
+        if not (one_treat):
+            raise ValueError(
+                "Incompatible data. "
+                "To fit an DID model with DML "
+                "exactly one variable needs to be specified as treatment variable."
+            )
+        _check_gt_values(obj_dml_data.g_values, obj_dml_data.t_values)
+        return
+
+    def _preprocess_data(self, g_value, pre_t, eval_t):
+        data = self._dml_data.data
+
+        t_col = self._dml_data.t_col
+        id_col = self._dml_data.id_col
+        g_col = self._dml_data.g_col
+
+        # relevant data subset
+        data_subset_indicator = data[t_col].isin([pre_t, eval_t])
+        data_subset = data[data_subset_indicator].sort_values(by=[id_col, t_col])
+
+        # Construct G (treatment group) indicating treatment period in g
+        G_indicator = (data_subset[g_col] == g_value).astype(int)
+
+        # Construct C (control group) indicating never treated or not yet treated
+        never_treated = _is_never_treated(data_subset[g_col], self.never_treated_value).reshape(-1)
+        if self.control_group == "never_treated":
+            C_indicator = never_treated.astype(int)
+
+        elif self.control_group == "not_yet_treated":
+            # adjust max_g_value for anticipation periods
+            t_values = self._dml_data.t_values
+            max_g_value = t_values[min(np.where(t_values == eval_t)[0][0] + self.anticipation_periods, len(t_values) - 1)]
+            # not in G just as a additional check
+            later_treated = (data_subset[g_col] > max_g_value) & (G_indicator == 0)
+            not_yet_treated = never_treated | later_treated
+            C_indicator = not_yet_treated.astype(int)
+
+        if np.sum(C_indicator) == 0:
+            raise ValueError("No observations in the control group.")
+
+        data_subset = data_subset.assign(C_indicator=C_indicator, G_indicator=G_indicator)
+        # reduce to relevant subset
+        data_subset = data_subset[(data_subset["G_indicator"] == 1) | (data_subset["C_indicator"] == 1)]
+        # check if G and C are disjoint
+        assert sum(G_indicator & C_indicator) == 0
+
+        # add time indicator
+        data_subset = data_subset.assign(t_indicator=data_subset[t_col] == eval_t)
+        return data_subset
+
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
+
+        # Here: d is a binary treatment indicator
+        x, y = check_X_y(X=self._x_data, y=self._y_data, force_all_finite=False)
+        _, d = check_X_y(x, self._g_data, force_all_finite=False)  # (d is the G_indicator)
+        _, t = check_X_y(x, self._t_data, force_all_finite=False)
+
+        # THIS DIFFERS FROM THE PAPER due to stratified splitting this should be the same for each fold
+        # nuisance estimates of the uncond. treatment prob.
+        p_hat = np.full_like(d, d.mean(), dtype="float64")
+        lambda_hat = np.full_like(t, t.mean(), dtype="float64")
+
+        # nuisance g
+        smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t)
+
+        # nuisance g for d==0 & t==0
+        if external_predictions["ml_g_d0_t0"] is not None:
+            ml_g_d0_t0_targets = np.full_like(y, np.nan, dtype="float64")
+            ml_g_d0_t0_targets[((d == 0) & (t == 0))] = y[((d == 0) & (t == 0))]
+            ml_d0_t0_pred = _get_id_positions(external_predictions["ml_g_d0_t0"], self.id_positions)
+            g_hat_d0_t0 = {"preds": ml_d0_t0_pred, "targets": ml_g_d0_t0_targets, "models": None}
+        else:
+            g_hat_d0_t0 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls_d0_t0,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g_d0_t0"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+
+            _check_finite_predictions(g_hat_d0_t0["preds"], self._learner["ml_g"], "ml_g", smpls)
+            # adjust target values to consider only compatible subsamples
+            g_hat_d0_t0["targets"] = g_hat_d0_t0["targets"].astype(float)
+            g_hat_d0_t0["targets"][np.invert((d == 0) & (t == 0))] = np.nan
+
+        # nuisance g for d==0 & t==1
+        if external_predictions["ml_g_d0_t1"] is not None:
+            ml_g_d0_t1_targets = np.full_like(y, np.nan, dtype="float64")
+            ml_g_d0_t1_targets[((d == 0) & (t == 1))] = y[((d == 0) & (t == 1))]
+            ml_d0_t1_pred = _get_id_positions(external_predictions["ml_g_d0_t1"], self.id_positions)
+            g_hat_d0_t1 = {"preds": ml_d0_t1_pred, "targets": ml_g_d0_t1_targets, "models": None}
+        else:
+            g_hat_d0_t1 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls_d0_t1,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g_d0_t1"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+
+            _check_finite_predictions(g_hat_d0_t1["preds"], self._learner["ml_g"], "ml_g", smpls)
+            # adjust target values to consider only compatible subsamples
+            g_hat_d0_t1["targets"] = g_hat_d0_t1["targets"].astype(float)
+            g_hat_d0_t1["targets"][np.invert((d == 0) & (t == 1))] = np.nan
+
+        # nuisance g for d==1 & t==0
+        if external_predictions["ml_g_d1_t0"] is not None:
+            ml_g_d1_t0_targets = np.full_like(y, np.nan, dtype="float64")
+            ml_g_d1_t0_targets[((d == 1) & (t == 0))] = y[((d == 1) & (t == 0))]
+            ml_d1_t0_pred = _get_id_positions(external_predictions["ml_g_d1_t0"], self.id_positions)
+            g_hat_d1_t0 = {"preds": ml_d1_t0_pred, "targets": ml_g_d1_t0_targets, "models": None}
+        else:
+            g_hat_d1_t0 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls_d1_t0,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g_d1_t0"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+
+            _check_finite_predictions(g_hat_d1_t0["preds"], self._learner["ml_g"], "ml_g", smpls)
+            # adjust target values to consider only compatible subsamples
+            g_hat_d1_t0["targets"] = g_hat_d1_t0["targets"].astype(float)
+            g_hat_d1_t0["targets"][np.invert((d == 1) & (t == 0))] = np.nan
+
+        # nuisance g for d==1 & t==1
+        if external_predictions["ml_g_d1_t1"] is not None:
+            ml_g_d1_t1_targets = np.full_like(y, np.nan, dtype="float64")
+            ml_g_d1_t1_targets[((d == 1) & (t == 1))] = y[((d == 1) & (t == 1))]
+            ml_d1_t1_pred = _get_id_positions(external_predictions["ml_g_d1_t1"], self.id_positions)
+            g_hat_d1_t1 = {"preds": ml_d1_t1_pred, "targets": ml_g_d1_t1_targets, "models": None}
+        else:
+            g_hat_d1_t1 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls_d1_t1,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g_d1_t1"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+
+            _check_finite_predictions(g_hat_d1_t1["preds"], self._learner["ml_g"], "ml_g", smpls)
+            # adjust target values to consider only compatible subsamples
+            g_hat_d1_t1["targets"] = g_hat_d1_t1["targets"].astype(float)
+            g_hat_d1_t1["targets"][np.invert((d == 1) & (t == 1))] = np.nan
+
+        # only relevant for observational setting
+        m_hat = {"preds": None, "targets": None, "models": None}
+        if self.score == "observational":
+            # nuisance m
+            if external_predictions["ml_m"] is not None:
+                ml_m_pred = _get_id_positions(external_predictions["ml_m"], self.id_positions)
+                m_hat = {"preds": ml_m_pred, "targets": d, "models": None}
+            else:
+                m_hat = _dml_cv_predict(
+                    self._learner["ml_m"],
+                    x,
+                    d,
+                    smpls=smpls,
+                    n_jobs=n_jobs_cv,
+                    est_params=self._get_params("ml_m"),
+                    method=self._predict_method["ml_m"],
+                    return_models=return_models,
+                )
+
+            _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
+            _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
+            m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
+
+        psi_a, psi_b = self._score_elements(
+            y,
+            d,
+            t,
+            g_hat_d0_t0["preds"],
+            g_hat_d0_t1["preds"],
+            g_hat_d1_t0["preds"],
+            g_hat_d1_t1["preds"],
+            m_hat["preds"],
+            p_hat,
+            lambda_hat,
+        )
+
+        extend_kwargs = {
+            "n_obs": self._dml_data.data.shape[0],
+            "id_positions": self.id_positions,
+        }
+        psi_elements = {
+            "psi_a": _set_id_positions(psi_a, fill_value=0.0, **extend_kwargs),
+            "psi_b": _set_id_positions(psi_b, fill_value=0.0, **extend_kwargs),
+        }
+        preds = {
+            "predictions": {
+                "ml_g_d0_t0": _set_id_positions(g_hat_d0_t0["preds"], fill_value=np.nan, **extend_kwargs),
+                "ml_g_d0_t1": _set_id_positions(g_hat_d0_t1["preds"], fill_value=np.nan, **extend_kwargs),
+                "ml_g_d1_t0": _set_id_positions(g_hat_d1_t0["preds"], fill_value=np.nan, **extend_kwargs),
+                "ml_g_d1_t1": _set_id_positions(g_hat_d1_t1["preds"], fill_value=np.nan, **extend_kwargs),
+                "ml_m": _set_id_positions(m_hat["preds"], fill_value=np.nan, **extend_kwargs),
+            },
+            "targets": {
+                "ml_g_d0_t0": _set_id_positions(g_hat_d0_t0["targets"], fill_value=np.nan, **extend_kwargs),
+                "ml_g_d0_t1": _set_id_positions(g_hat_d0_t1["targets"], fill_value=np.nan, **extend_kwargs),
+                "ml_g_d1_t0": _set_id_positions(g_hat_d1_t0["targets"], fill_value=np.nan, **extend_kwargs),
+                "ml_g_d1_t1": _set_id_positions(g_hat_d1_t1["targets"], fill_value=np.nan, **extend_kwargs),
+                "ml_m": _set_id_positions(m_hat["targets"], fill_value=np.nan, **extend_kwargs),
+            },
+            "models": {
+                "ml_g_d0_t0": g_hat_d0_t0["models"],
+                "ml_g_d0_t1": g_hat_d0_t1["models"],
+                "ml_g_d1_t0": g_hat_d1_t0["models"],
+                "ml_g_d1_t1": g_hat_d1_t1["models"],
+                "ml_m": m_hat["models"],
+            },
+        }
+
+        return psi_elements, preds
+
+    def _score_elements(self, y, d, t, g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_d1_t1, m_hat, p_hat, lambda_hat):
+        # calculate residuals
+        resid_d0_t0 = y - g_hat_d0_t0
+        resid_d0_t1 = y - g_hat_d0_t1
+        resid_d1_t0 = y - g_hat_d1_t0
+        resid_d1_t1 = y - g_hat_d1_t1
+
+        d1t1 = np.multiply(d, t)
+        d1t0 = np.multiply(d, 1.0 - t)
+        d0t1 = np.multiply(1.0 - d, t)
+        d0t0 = np.multiply(1.0 - d, 1.0 - t)
+
+        if self.score == "observational":
+            if self.in_sample_normalization:
+                weight_psi_a = np.divide(d, np.mean(d))
+                weight_g_d1_t1 = weight_psi_a
+                weight_g_d1_t0 = -1.0 * weight_psi_a
+                weight_g_d0_t1 = -1.0 * weight_psi_a
+                weight_g_d0_t0 = weight_psi_a
+
+                weight_resid_d1_t1 = np.divide(d1t1, np.mean(d1t1))
+                weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.mean(d1t0))
+
+                prop_weighting = np.divide(m_hat, 1.0 - m_hat)
+                unscaled_d0_t1 = np.multiply(d0t1, prop_weighting)
+                weight_resid_d0_t1 = -1.0 * np.divide(unscaled_d0_t1, np.mean(unscaled_d0_t1))
+
+                unscaled_d0_t0 = np.multiply(d0t0, prop_weighting)
+                weight_resid_d0_t0 = np.divide(unscaled_d0_t0, np.mean(unscaled_d0_t0))
+            else:
+                weight_psi_a = np.divide(d, p_hat)
+                weight_g_d1_t1 = weight_psi_a
+                weight_g_d1_t0 = -1.0 * weight_psi_a
+                weight_g_d0_t1 = -1.0 * weight_psi_a
+                weight_g_d0_t0 = weight_psi_a
+
+                weight_resid_d1_t1 = np.divide(d1t1, np.multiply(p_hat, lambda_hat))
+                weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.multiply(p_hat, 1.0 - lambda_hat))
+
+                prop_weighting = np.divide(m_hat, 1.0 - m_hat)
+                weight_resid_d0_t1 = -1.0 * np.multiply(np.divide(d0t1, np.multiply(p_hat, lambda_hat)), prop_weighting)
+                weight_resid_d0_t0 = np.multiply(np.divide(d0t0, np.multiply(p_hat, 1.0 - lambda_hat)), prop_weighting)
+        else:
+            assert self.score == "experimental"
+            if self.in_sample_normalization:
+                weight_psi_a = np.ones_like(y)
+                weight_g_d1_t1 = weight_psi_a
+                weight_g_d1_t0 = -1.0 * weight_psi_a
+                weight_g_d0_t1 = -1.0 * weight_psi_a
+                weight_g_d0_t0 = weight_psi_a
+
+                weight_resid_d1_t1 = np.divide(d1t1, np.mean(d1t1))
+                weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.mean(d1t0))
+                weight_resid_d0_t1 = -1.0 * np.divide(d0t1, np.mean(d0t1))
+                weight_resid_d0_t0 = np.divide(d0t0, np.mean(d0t0))
+            else:
+                weight_psi_a = np.ones_like(y)
+                weight_g_d1_t1 = weight_psi_a
+                weight_g_d1_t0 = -1.0 * weight_psi_a
+                weight_g_d0_t1 = -1.0 * weight_psi_a
+                weight_g_d0_t0 = weight_psi_a
+
+                weight_resid_d1_t1 = np.divide(d1t1, np.multiply(p_hat, lambda_hat))
+                weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.multiply(p_hat, 1.0 - lambda_hat))
+                weight_resid_d0_t1 = -1.0 * np.divide(d0t1, np.multiply(1.0 - p_hat, lambda_hat))
+                weight_resid_d0_t0 = np.divide(d0t0, np.multiply(1.0 - p_hat, 1.0 - lambda_hat))
+
+        # set score elements
+        psi_a = -1.0 * weight_psi_a
+
+        # psi_b
+        psi_b_1 = (
+            np.multiply(weight_g_d1_t1, g_hat_d1_t1)
+            + np.multiply(weight_g_d1_t0, g_hat_d1_t0)
+            + np.multiply(weight_g_d0_t0, g_hat_d0_t0)
+            + np.multiply(weight_g_d0_t1, g_hat_d0_t1)
+        )
+        psi_b_2 = (
+            np.multiply(weight_resid_d1_t1, resid_d1_t1)
+            + np.multiply(weight_resid_d1_t0, resid_d1_t0)
+            + np.multiply(weight_resid_d0_t0, resid_d0_t0)
+            + np.multiply(weight_resid_d0_t1, resid_d0_t1)
+        )
+
+        psi_b = psi_b_1 + psi_b_2
+
+        return psi_a, psi_b
+
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        pass
+
+    def _sensitivity_element_est(self, preds):
+        pass
diff --git a/doubleml/did/tests/test_did_cs_binary_external_predictions.py b/doubleml/did/tests/test_did_cs_binary_external_predictions.py
new file mode 100644
index 00000000..4e09dfe0
--- /dev/null
+++ b/doubleml/did/tests/test_did_cs_binary_external_predictions.py
@@ -0,0 +1,92 @@
+import math
+
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+from doubleml.did import DoubleMLDIDCSBinary
+from doubleml.did.datasets import make_did_SZ2020
+from doubleml.tests._utils import draw_smpls
+from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
+
+
+@pytest.fixture(scope="module", params=["observational", "experimental"])
+def did_score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_did_cs_fixture(did_score, n_rep):
+    n_obs = 500
+    n_folds = 5
+
+    ext_predictions = {"d": {}}
+    dml_data = make_did_SZ2020(n_obs=n_obs, return_type="DoubleMLPanelData")
+
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "g_value": 1,
+        "t_value_pre": 0,
+        "t_value_eval": 1,
+        "score": did_score,
+        "n_rep": n_rep,
+        "draw_sample_splitting": False,
+    }
+
+    dml_did = DoubleMLDIDCSBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    strata = dml_did.data_subset["G_indicator"] + 2 * dml_did.data_subset["t_indicator"]
+    all_smpls = draw_smpls(2 * n_obs, n_folds, n_rep=n_rep, groups=strata)
+    dml_did.set_sample_splitting(all_smpls)
+
+    np.random.seed(3141)
+    dml_did.fit(store_predictions=True)
+
+    all_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"]
+    for key in all_keys:
+        ext_predictions["d"][key] = dml_did.predictions[key][:, :, 0]
+    if did_score == "observational":
+        ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0]
+
+    dml_did_ext = DoubleMLDIDCSBinary(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs)
+    dml_did_ext.set_sample_splitting(all_smpls)
+    np.random.seed(3141)
+    dml_did_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {
+        "coef": dml_did.coef[0],
+        "coef_ext": dml_did_ext.coef[0],
+        "se": dml_did.se[0],
+        "se_ext": dml_did_ext.se[0],
+        "score": dml_did.psi,
+        "score_ext": dml_did_ext.psi,
+        "dml_did_nuisance_loss": dml_did.nuisance_loss,
+        "dml_did_ext_nuisance_loss": dml_did_ext.nuisance_loss,
+    }
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_coef(doubleml_did_cs_fixture):
+    assert math.isclose(doubleml_did_cs_fixture["coef"], doubleml_did_cs_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3)
+
+
+@pytest.mark.ci
+def test_se(doubleml_did_cs_fixture):
+    assert math.isclose(doubleml_did_cs_fixture["se"], doubleml_did_cs_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-3)
+
+
+@pytest.mark.ci
+def test_score(doubleml_did_cs_fixture):
+    assert np.allclose(doubleml_did_cs_fixture["score"], doubleml_did_cs_fixture["score_ext"], rtol=1e-9, atol=1e-3)
+
+
+@pytest.mark.ci
+def test_nuisance_loss(doubleml_did_cs_fixture):
+    for key, value in doubleml_did_cs_fixture["dml_did_nuisance_loss"].items():
+        assert np.allclose(value, doubleml_did_cs_fixture["dml_did_ext_nuisance_loss"][key], rtol=1e-9, atol=1e-3)
diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py
new file mode 100644
index 00000000..2c8c34f3
--- /dev/null
+++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py
@@ -0,0 +1,163 @@
+import math
+
+import numpy as np
+import pytest
+from sklearn.base import clone
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+import doubleml as dml
+
+from ...tests._utils import draw_smpls
+from ._utils_did_cs_manual import fit_did_cs
+from ._utils_did_manual import boot_did
+
+
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [
+            RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42),
+        ],
+    ],
+)
+def learner(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["observational", "experimental"])
+def score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def in_sample_normalization(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[0.1])
+def trimming_threshold(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score, in_sample_normalization, trimming_threshold):
+    boot_methods = ["normal"]
+    n_folds = 2
+    n_rep_boot = 499
+
+    # collect data
+    dml_panel_data = generate_data_did_binary
+    df = dml_panel_data._data.sort_values(by=["id", "t"])
+
+    n_obs = df.shape[0]
+    all_smpls = draw_smpls(n_obs, n_folds)
+    obj_dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"])
+
+    # Set machine learning methods for m & g
+    ml_g = clone(learner[0])
+    ml_m = clone(learner[1])
+
+    dml_args = {
+        "ml_g": ml_g,
+        "ml_m": ml_m,
+        "n_folds": n_folds,
+        "score": score,
+        "in_sample_normalization": in_sample_normalization,
+        "trimming_threshold": trimming_threshold,
+        "draw_sample_splitting": False,
+    }
+
+    dml_did_binary_obj = dml.did.DoubleMLDIDCSBinary(
+        dml_panel_data,
+        g_value=1,
+        t_value_pre=0,
+        t_value_eval=1,
+        **dml_args,
+    )
+
+    dml_did_obj = dml.DoubleMLDIDCS(
+        obj_dml_data,
+        **dml_args,
+    )
+
+    # synchronize the sample splitting
+    dml_did_obj.set_sample_splitting(all_smpls=all_smpls)
+    dml_did_binary_obj.set_sample_splitting(all_smpls=all_smpls)
+
+    dml_did_obj.fit()
+    dml_did_binary_obj.fit()
+
+    # manual fit
+    y = df["y"].values
+    d = df["d"].values
+    x = df[["Z1", "Z2", "Z3", "Z4"]].values
+    t = df["t"].values
+
+    np.random.seed(3141)
+    res_manual = fit_did_cs(
+        y,
+        x,
+        d,
+        t,
+        clone(learner[0]),
+        clone(learner[1]),
+        all_smpls,
+        score,
+        in_sample_normalization,
+        trimming_threshold=trimming_threshold,
+    )
+
+    res_dict = {
+        "coef": dml_did_obj.coef,
+        "coef_binary": dml_did_binary_obj.coef,
+        "coef_manual": res_manual["theta"],
+        "se": dml_did_obj.se,
+        "se_binary": dml_did_binary_obj.se,
+        "se_manual": res_manual["se"],
+        "nuisance_loss": dml_did_obj.nuisance_loss,
+        "nuisance_loss_binary": dml_did_binary_obj.nuisance_loss,
+        "boot_methods": boot_methods,
+    }
+
+    for bootstrap in boot_methods:
+        np.random.seed(3141)
+        boot_t_stat = boot_did(
+            y,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_psi_a"],
+            res_manual["all_psi_b"],
+            all_smpls,
+            bootstrap,
+            n_rep_boot,
+        )
+
+        np.random.seed(3141)
+        dml_did_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+        np.random.seed(3141)
+        dml_did_binary_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+
+        res_dict["boot_t_stat" + bootstrap] = dml_did_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_binary"] = dml_did_binary_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_coefs(dml_did_cs_binary_vs_did_cs_fixture):
+    assert math.isclose(
+        dml_did_cs_binary_vs_did_cs_fixture["coef"][0],
+        dml_did_cs_binary_vs_did_cs_fixture["coef_manual"],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
+    assert math.isclose(
+        dml_did_cs_binary_vs_did_cs_fixture["coef_binary"][0],
+        dml_did_cs_binary_vs_did_cs_fixture["coef"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )

From ceebc6ee5d462016f8ddaab3e8d0c2f9325665be Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Tue, 3 Jun 2025 14:19:28 +0200
Subject: [PATCH 05/84] add internal atribute _score_dim to DoubleML class

---
 doubleml/double_ml.py | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 764865a4..0ab80cfa 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -101,6 +101,7 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting):
         if draw_sample_splitting:
             self.draw_sample_splitting()
 
+        self._score_dim = (self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs)
         # initialize arrays according to obj_dml_data and the resampling settings
         (
             self._psi,
@@ -1021,9 +1022,7 @@ def _initalize_fit(self, store_predictions, store_models):
             self._initialize_models()
 
         if self._sensitivity_implemented:
-            self._sensitivity_elements = self._initialize_sensitivity_elements(
-                (self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs)
-            )
+            self._sensitivity_elements = self._initialize_sensitivity_elements(self._score_dim)
 
     def _fit_nuisance_and_score_elements(self, n_jobs_cv, store_predictions, external_predictions, store_models):
         ext_prediction_dict = _set_external_predictions(
@@ -1076,30 +1075,26 @@ def _fit_sensitivity_elements(self, nuisance_predictions):
 
     def _initialize_arrays(self):
         # scores
-        psi = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
-        psi_deriv = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
-        psi_elements = self._initialize_score_elements((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs))
+        psi = np.full(self._score_dim, np.nan)
+        psi_deriv = np.full(self._score_dim, np.nan)
+        psi_elements = self._initialize_score_elements(self._score_dim)
 
-        var_scaling_factors = np.full(self._dml_data.n_treat, np.nan)
+        n_rep = self._score_dim[1]
+        n_thetas = self._score_dim[2]
 
+        var_scaling_factors = np.full(n_thetas, np.nan)
         # coefficients and ses
-        coef = np.full(self._dml_data.n_coefs, np.nan)
-        se = np.full(self._dml_data.n_coefs, np.nan)
+        coef = np.full(n_thetas, np.nan)
+        se = np.full(n_thetas, np.nan)
 
-        all_coef = np.full((self._dml_data.n_coefs, self.n_rep), np.nan)
-        all_se = np.full((self._dml_data.n_coefs, self.n_rep), np.nan)
+        all_coef = np.full((n_thetas, n_rep), np.nan)
+        all_se = np.full((n_thetas, n_rep), np.nan)
 
         return psi, psi_deriv, psi_elements, var_scaling_factors, coef, se, all_coef, all_se
 
     def _initialize_predictions_and_targets(self):
-        self._predictions = {
-            learner: np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
-            for learner in self.params_names
-        }
-        self._nuisance_targets = {
-            learner: np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
-            for learner in self.params_names
-        }
+        self._predictions = {learner: np.full(self._score_dim, np.nan) for learner in self.params_names}
+        self._nuisance_targets = {learner: np.full(self._score_dim, np.nan) for learner in self.params_names}
 
     def _initialize_nuisance_loss(self):
         self._nuisance_loss = {learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) for learner in self.params_names}

From ade3b9a451bb0cb20367661773d9a00eb3f9968e Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Tue, 3 Jun 2025 14:25:12 +0200
Subject: [PATCH 06/84] check prediction size based on internal n_obs

---
 doubleml/double_ml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 0ab80cfa..911487a3 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1005,7 +1005,7 @@ def _check_fit(self, n_jobs_cv, store_predictions, external_predictions, store_m
                 external_predictions=external_predictions,
                 valid_treatments=self._dml_data.d_cols,
                 valid_learners=self.params_names,
-                n_obs=self._dml_data.n_obs,
+                n_obs=self.n_obs,
                 n_rep=self.n_rep,
             )
         elif not self._external_predictions_implemented and external_predictions is not None:

From f113e61e1375f807c039f20b94f777d73e0c6504 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Tue, 3 Jun 2025 14:26:28 +0200
Subject: [PATCH 07/84] update score dimensions init in the cs object

---
 doubleml/did/did_cs_binary.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index ce57384c..e550eb60 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -50,6 +50,20 @@ def __init__(
     ):
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting=False)
 
+        self._n_obs = obj_dml_data.data.shape[0]
+        self._score_dim = (self._n_obs, self.n_rep, self._dml_data.n_treat)
+        # reinitialze arrays
+        (
+            self._psi,
+            self._psi_deriv,
+            self._psi_elements,
+            self._var_scaling_factors,
+            self._coef,
+            self._se,
+            self._all_coef,
+            self._all_se,
+        ) = self._initialize_arrays()
+
         self._check_data(self._dml_data)
         g_values = self._dml_data.g_values
         t_values = self._dml_data.t_values

From d65edf8b861dabaf5c4c1b4468303231b781fcc0 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 4 Jun 2025 14:35:41 +0200
Subject: [PATCH 08/84] Refactor Data Generators #306

---
 doubleml/datasets.py                          | 1620 -----------------
 doubleml/datasets/__init__.py                 |   13 +
 doubleml/datasets/fetch_401K.py               |   65 +
 doubleml/datasets/fetch_bonus.py              |   98 +
 doubleml/irm/datasets/__init__.py             |   20 +
 .../irm/datasets/dgp_confounded_irm_data.py   |  232 +++
 .../irm/datasets/dgp_heterogeneous_data.py    |  114 ++
 doubleml/irm/datasets/dgp_iivm_data.py        |  102 ++
 doubleml/irm/datasets/dgp_irm_data.py         |  103 ++
 .../dgp_irm_data_discrete_treatments.py       |  164 ++
 doubleml/irm/datasets/dgp_ssm_data.py         |  102 ++
 doubleml/plm/datasets/__init__.py             |   20 +
 doubleml/plm/datasets/_make_pliv_data.py      |   70 +
 .../plm/datasets/dgp_confounded_plr_data.py   |  171 ++
 doubleml/plm/datasets/dgp_pliv_CHS2015.py     |  108 ++
 .../dgp_pliv_multiway_cluster_CKMS2021.py     |  199 ++
 doubleml/plm/datasets/dgp_plr_CCDDHNR2018.py  |  108 ++
 doubleml/plm/datasets/dgp_plr_turrell2018.py  |  107 ++
 18 files changed, 1796 insertions(+), 1620 deletions(-)
 delete mode 100644 doubleml/datasets.py
 create mode 100644 doubleml/datasets/__init__.py
 create mode 100644 doubleml/datasets/fetch_401K.py
 create mode 100644 doubleml/datasets/fetch_bonus.py
 create mode 100644 doubleml/irm/datasets/__init__.py
 create mode 100644 doubleml/irm/datasets/dgp_confounded_irm_data.py
 create mode 100644 doubleml/irm/datasets/dgp_heterogeneous_data.py
 create mode 100644 doubleml/irm/datasets/dgp_iivm_data.py
 create mode 100644 doubleml/irm/datasets/dgp_irm_data.py
 create mode 100644 doubleml/irm/datasets/dgp_irm_data_discrete_treatments.py
 create mode 100644 doubleml/irm/datasets/dgp_ssm_data.py
 create mode 100644 doubleml/plm/datasets/__init__.py
 create mode 100644 doubleml/plm/datasets/_make_pliv_data.py
 create mode 100644 doubleml/plm/datasets/dgp_confounded_plr_data.py
 create mode 100644 doubleml/plm/datasets/dgp_pliv_CHS2015.py
 create mode 100644 doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
 create mode 100644 doubleml/plm/datasets/dgp_plr_CCDDHNR2018.py
 create mode 100644 doubleml/plm/datasets/dgp_plr_turrell2018.py

diff --git a/doubleml/datasets.py b/doubleml/datasets.py
deleted file mode 100644
index 0dcd33c7..00000000
--- a/doubleml/datasets.py
+++ /dev/null
@@ -1,1620 +0,0 @@
-import warnings
-
-import numpy as np
-import pandas as pd
-from scipy.linalg import toeplitz
-from scipy.optimize import minimize_scalar
-from sklearn.datasets import make_spd_matrix
-from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures
-
-from doubleml.data import DoubleMLClusterData, DoubleMLData
-from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_cluster_data_alias, _get_dml_data_alias
-
-_array_alias = _get_array_alias()
-_data_frame_alias = _get_data_frame_alias()
-_dml_data_alias = _get_dml_data_alias()
-_dml_cluster_data_alias = _get_dml_cluster_data_alias()
-
-
-def fetch_401K(return_type="DoubleMLData", polynomial_features=False):
-    """
-    Data set on financial wealth and 401(k) plan participation.
-
-    Parameters
-    ----------
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-    polynomial_features :
-        If ``True`` polynomial features are added (see replication files of Chernozhukov et al. (2018)).
-
-    References
-    ----------
-    Abadie, A. (2003), Semiparametric instrumental variable estimation of treatment response models. Journal of
-    Econometrics, 113(2): 231-263.
-
-    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
-    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
-    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
-    """
-    url = "https://github.com/VC2015/DMLonGitHub/raw/master/sipp1991.dta"
-    raw_data = pd.read_stata(url)
-
-    y_col = "net_tfa"
-    d_cols = ["e401"]
-    x_cols = ["age", "inc", "educ", "fsize", "marr", "twoearn", "db", "pira", "hown"]
-
-    data = raw_data.copy()
-
-    if polynomial_features:
-        raise NotImplementedError("polynomial_features os not implemented yet for fetch_401K.")
-
-    if return_type in _data_frame_alias + _dml_data_alias:
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, y_col, d_cols, x_cols)
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def fetch_bonus(return_type="DoubleMLData", polynomial_features=False):
-    """
-    Data set on the Pennsylvania Reemployment Bonus experiment.
-
-    Parameters
-    ----------
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-    polynomial_features :
-        If ``True`` polynomial features are added (see replication files of Chernozhukov et al. (2018)).
-
-    References
-    ----------
-    Bilias Y. (2000), Sequential Testing of Duration Data: The Case of Pennsylvania 'Reemployment Bonus' Experiment.
-    Journal of Applied Econometrics, 15(6): 575-594.
-
-    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
-    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
-    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
-    """
-    url = "https://raw.githubusercontent.com/VC2015/DMLonGitHub/master/penn_jae.dat"
-    raw_data = pd.read_csv(url, sep=r"\s+")
-
-    ind = (raw_data["tg"] == 0) | (raw_data["tg"] == 4)
-    data = raw_data.copy()[ind]
-    data.reset_index(inplace=True)
-    data["tg"] = data["tg"].replace(4, 1)
-    data["inuidur1"] = np.log(data["inuidur1"])
-
-    # variable dep as factor (dummy encoding)
-    dummy_enc = OneHotEncoder(drop="first", categories="auto").fit(data.loc[:, ["dep"]])
-    xx = dummy_enc.transform(data.loc[:, ["dep"]]).toarray()
-    data["dep1"] = xx[:, 0]
-    data["dep2"] = xx[:, 1]
-
-    y_col = "inuidur1"
-    d_cols = ["tg"]
-    x_cols = [
-        "female",
-        "black",
-        "othrace",
-        "dep1",
-        "dep2",
-        "q2",
-        "q3",
-        "q4",
-        "q5",
-        "q6",
-        "agelt35",
-        "agegt54",
-        "durable",
-        "lusd",
-        "husd",
-    ]
-
-    if polynomial_features:
-        poly = PolynomialFeatures(2, include_bias=False)
-        data_transf = poly.fit_transform(data[x_cols])
-        x_cols = list(poly.get_feature_names_out(x_cols))
-
-        data_transf = pd.DataFrame(data_transf, columns=x_cols)
-        data = pd.concat((data[[y_col] + d_cols], data_transf), axis=1, sort=False)
-
-    if return_type in _data_frame_alias + _dml_data_alias:
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, y_col, d_cols, x_cols)
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def _g(x):
-    return np.power(np.sin(x), 2)
-
-
-def _m(x, nu=0.0, gamma=1.0):
-    return 0.5 / np.pi * (np.sinh(gamma)) / (np.cosh(gamma) - np.cos(x - nu))
-
-
-def make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type="DoubleMLData", **kwargs):
-    """
-    Generates data from a partially linear regression model used in Chernozhukov et al. (2018) for Figure 1.
-    The data generating process is defined as
-
-    .. math::
-
-        d_i &= m_0(x_i) + s_1 v_i, & &v_i \\sim \\mathcal{N}(0,1),
-
-        y_i &= \\alpha d_i + g_0(x_i) + s_2 \\zeta_i, & &\\zeta_i \\sim \\mathcal{N}(0,1),
-
-
-    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
-    :math:`\\Sigma_{kj} = 0.7^{|j-k|}`.
-    The nuisance functions are given by
-
-    .. math::
-
-        m_0(x_i) &= a_0 x_{i,1} + a_1 \\frac{\\exp(x_{i,3})}{1+\\exp(x_{i,3})},
-
-        g_0(x_i) &= b_0 \\frac{\\exp(x_{i,1})}{1+\\exp(x_{i,1})} + b_1 x_{i,3}.
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    alpha :
-        The value of the causal parameter.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
-    **kwargs
-        Additional keyword arguments to set non-default values for the parameters
-        :math:`a_0=1`, :math:`a_1=0.25`, :math:`s_1=1`, :math:`b_0=1`, :math:`b_1=0.25` or :math:`s_2=1`.
-
-    References
-    ----------
-    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
-    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
-    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
-    """
-    a_0 = kwargs.get("a_0", 1.0)
-    a_1 = kwargs.get("a_1", 0.25)
-    s_1 = kwargs.get("s_1", 1.0)
-
-    b_0 = kwargs.get("b_0", 1.0)
-    b_1 = kwargs.get("b_1", 0.25)
-    s_2 = kwargs.get("s_2", 1.0)
-
-    cov_mat = toeplitz([np.power(0.7, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        cov_mat,
-        size=[
-            n_obs,
-        ],
-    )
-
-    d = (
-        a_0 * x[:, 0]
-        + a_1 * np.divide(np.exp(x[:, 2]), 1 + np.exp(x[:, 2]))
-        + s_1
-        * np.random.standard_normal(
-            size=[
-                n_obs,
-            ]
-        )
-    )
-    y = (
-        alpha * d
-        + b_0 * np.divide(np.exp(x[:, 0]), 1 + np.exp(x[:, 0]))
-        + b_1 * x[:, 2]
-        + s_2
-        * np.random.standard_normal(
-            size=[
-                n_obs,
-            ]
-        )
-    )
-
-    if return_type in _array_alias:
-        return x, y, d
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, "y", "d", x_cols)
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def make_plr_turrell2018(n_obs=100, dim_x=20, theta=0.5, return_type="DoubleMLData", **kwargs):
-    """
-    Generates data from a partially linear regression model used in a blog article by Turrell (2018).
-    The data generating process is defined as
-
-    .. math::
-
-        d_i &= m_0(x_i' b) + v_i, & &v_i \\sim \\mathcal{N}(0,1),
-
-        y_i &= \\theta d_i + g_0(x_i' b) + u_i, & &u_i \\sim \\mathcal{N}(0,1),
-
-
-    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a random symmetric,
-    positive-definite matrix generated with :py:meth:`sklearn.datasets.make_spd_matrix`.
-    :math:`b` is a vector with entries :math:`b_j=\\frac{1}{j}` and the nuisance functions are given by
-
-    .. math::
-
-        m_0(x_i) &= \\frac{1}{2 \\pi} \\frac{\\sinh(\\gamma)}{\\cosh(\\gamma) - \\cos(x_i-\\nu)},
-
-        g_0(x_i) &= \\sin(x_i)^2.
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
-    **kwargs
-        Additional keyword arguments to set non-default values for the parameters
-        :math:`\\nu=0`, or :math:`\\gamma=1`.
-
-    References
-    ----------
-    Turrell, A. (2018), Econometrics in Python part I - Double machine learning, Markov Wanderer: A blog on economics,
-    science, coding and data. `https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/
-    <https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/>`_.
-    """
-    nu = kwargs.get("nu", 0.0)
-    gamma = kwargs.get("gamma", 1.0)
-
-    b = [1 / k for k in range(1, dim_x + 1)]
-    sigma = make_spd_matrix(dim_x)
-
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        sigma,
-        size=[
-            n_obs,
-        ],
-    )
-    G = _g(np.dot(x, b))
-    M = _m(np.dot(x, b), nu=nu, gamma=gamma)
-    d = M + np.random.standard_normal(
-        size=[
-            n_obs,
-        ]
-    )
-    y = (
-        np.dot(theta, d)
-        + G
-        + np.random.standard_normal(
-            size=[
-                n_obs,
-            ]
-        )
-    )
-
-    if return_type in _array_alias:
-        return x, y, d
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, "y", "d", x_cols)
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def make_irm_data(n_obs=500, dim_x=20, theta=0, R2_d=0.5, R2_y=0.5, return_type="DoubleMLData"):
-    """
-    Generates data from a interactive regression (IRM) model.
-    The data generating process is defined as
-
-    .. math::
-
-        d_i &= 1\\left\\lbrace \\frac{\\exp(c_d x_i' \\beta)}{1+\\exp(c_d x_i' \\beta)} > v_i \\right\\rbrace, & &v_i
-        \\sim \\mathcal{U}(0,1),
-
-        y_i &= \\theta d_i + c_y x_i' \\beta d_i + \\zeta_i, & &\\zeta_i \\sim \\mathcal{N}(0,1),
-
-    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
-    :math:`\\Sigma_{kj} = 0.5^{|j-k|}`.
-    :math:`\\beta` is a `dim_x`-vector with entries :math:`\\beta_j=\\frac{1}{j^2}` and the constants :math:`c_y` and
-    :math:`c_d` are given by
-
-    .. math::
-
-        c_y = \\sqrt{\\frac{R_y^2}{(1-R_y^2) \\beta' \\Sigma \\beta}}, \\qquad c_d =
-        \\sqrt{\\frac{(\\pi^2 /3) R_d^2}{(1-R_d^2) \\beta' \\Sigma \\beta}}.
-
-    The data generating process is inspired by a process used in the simulation experiment (see Appendix P) of Belloni
-    et al. (2017).
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    R2_d :
-        The value of the parameter :math:`R_d^2`.
-    R2_y :
-        The value of the parameter :math:`R_y^2`.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
-
-    References
-    ----------
-    Belloni, A., Chernozhukov, V., Fernández‐Val, I. and Hansen, C. (2017). Program Evaluation and Causal Inference With
-    High‐Dimensional Data. Econometrica, 85: 233-298.
-    """
-    # inspired by https://onlinelibrary.wiley.com/doi/abs/10.3982/ECTA12723, see suplement
-    v = np.random.uniform(
-        size=[
-            n_obs,
-        ]
-    )
-    zeta = np.random.standard_normal(
-        size=[
-            n_obs,
-        ]
-    )
-
-    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        cov_mat,
-        size=[
-            n_obs,
-        ],
-    )
-
-    beta = [1 / (k**2) for k in range(1, dim_x + 1)]
-    b_sigma_b = np.dot(np.dot(cov_mat, beta), beta)
-    c_y = np.sqrt(R2_y / ((1 - R2_y) * b_sigma_b))
-    c_d = np.sqrt(np.pi**2 / 3.0 * R2_d / ((1 - R2_d) * b_sigma_b))
-
-    xx = np.exp(np.dot(x, np.multiply(beta, c_d)))
-    d = 1.0 * ((xx / (1 + xx)) > v)
-
-    y = d * theta + d * np.dot(x, np.multiply(beta, c_y)) + zeta
-
-    if return_type in _array_alias:
-        return x, y, d
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, "y", "d", x_cols)
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def make_iivm_data(n_obs=500, dim_x=20, theta=1.0, alpha_x=0.2, return_type="DoubleMLData"):
-    """
-    Generates data from a interactive IV regression (IIVM) model.
-    The data generating process is defined as
-
-    .. math::
-
-        d_i &= 1\\left\\lbrace \\alpha_x Z + v_i > 0 \\right\\rbrace,
-
-        y_i &= \\theta d_i + x_i' \\beta + u_i,
-
-    with :math:`Z \\sim \\text{Bernoulli}(0.5)` and
-
-    .. math::
-
-        \\left(\\begin{matrix} u_i \\\\ v_i \\end{matrix} \\right) \\sim
-        \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & 0.3 \\\\ 0.3 & 1 \\end{matrix} \\right) \\right).
-
-    The covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
-    :math:`\\Sigma_{kj} = 0.5^{|j-k|}` and :math:`\\beta` is a `dim_x`-vector with entries
-    :math:`\\beta_j=\\frac{1}{j^2}`.
-
-    The data generating process is inspired by a process used in the simulation experiment of Farbmacher, Gruber and
-    Klaassen (2020).
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    alpha_x :
-        The value of the parameter :math:`\\alpha_x`.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z)``.
-
-    References
-    ----------
-    Farbmacher, H., Guber, R. and Klaaßen, S. (2020). Instrument Validity Tests with Causal Forests. MEA Discussion
-    Paper No. 13-2020. Available at SSRN: http://dx.doi.org/10.2139/ssrn.3619201.
-    """
-    # inspired by https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3619201
-    xx = np.random.multivariate_normal(
-        np.zeros(2),
-        np.array([[1.0, 0.3], [0.3, 1.0]]),
-        size=[
-            n_obs,
-        ],
-    )
-    u = xx[:, 0]
-    v = xx[:, 1]
-
-    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        cov_mat,
-        size=[
-            n_obs,
-        ],
-    )
-
-    beta = [1 / (k**2) for k in range(1, dim_x + 1)]
-
-    z = np.random.binomial(
-        p=0.5,
-        n=1,
-        size=[
-            n_obs,
-        ],
-    )
-    d = 1.0 * (alpha_x * z + v > 0)
-
-    y = d * theta + np.dot(x, beta) + u
-
-    if return_type in _array_alias:
-        return x, y, d, z
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d", "z"])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, "y", "d", x_cols, "z")
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def _make_pliv_data(n_obs=100, dim_x=20, theta=0.5, gamma_z=0.4, return_type="DoubleMLData"):
-    b = [1 / k for k in range(1, dim_x + 1)]
-    sigma = make_spd_matrix(dim_x)
-
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        sigma,
-        size=[
-            n_obs,
-        ],
-    )
-    G = _g(np.dot(x, b))
-    # instrument
-    z = _m(np.dot(x, b)) + np.random.standard_normal(
-        size=[
-            n_obs,
-        ]
-    )
-    # treatment
-    M = _m(gamma_z * z + np.dot(x, b))
-    d = M + np.random.standard_normal(
-        size=[
-            n_obs,
-        ]
-    )
-    y = (
-        np.dot(theta, d)
-        + G
-        + np.random.standard_normal(
-            size=[
-                n_obs,
-            ]
-        )
-    )
-
-    if return_type in _array_alias:
-        return x, y, d, z
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d", "z"])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, "y", "d", x_cols, "z")
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def make_pliv_CHS2015(n_obs, alpha=1.0, dim_x=200, dim_z=150, return_type="DoubleMLData"):
-    """
-    Generates data from a partially linear IV regression model used in Chernozhukov, Hansen and Spindler (2015).
-    The data generating process is defined as
-
-    .. math::
-
-        z_i &= \\Pi x_i + \\zeta_i,
-
-        d_i &= x_i' \\gamma + z_i' \\delta + u_i,
-
-        y_i &= \\alpha d_i + x_i' \\beta + \\varepsilon_i,
-
-    with
-
-    .. math::
-
-        \\left(\\begin{matrix} \\varepsilon_i \\\\ u_i \\\\ \\zeta_i \\\\ x_i \\end{matrix} \\right) \\sim
-        \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & 0.6 & 0 & 0 \\\\ 0.6 & 1 & 0 & 0 \\\\
-        0 & 0 & 0.25 I_{p_n^z} & 0 \\\\ 0 & 0 & 0 & \\Sigma \\end{matrix} \\right) \\right)
-
-    where  :math:`\\Sigma` is a :math:`p_n^x \\times p_n^x` matrix with entries
-    :math:`\\Sigma_{kj} = 0.5^{|j-k|}` and :math:`I_{p_n^z}` is the :math:`p_n^z \\times p_n^z` identity matrix.
-    :math:`\\beta = \\gamma` is a :math:`p_n^x`-vector with entries :math:`\\beta_j=\\frac{1}{j^2}`,
-    :math:`\\delta` is a :math:`p_n^z`-vector with entries :math:`\\delta_j=\\frac{1}{j^2}`
-    and :math:`\\Pi = (I_{p_n^z}, 0_{p_n^z \\times (p_n^x - p_n^z)})`.
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    alpha :
-        The value of the causal parameter.
-    dim_x :
-        The number of covariates.
-    dim_z :
-        The number of instruments.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z)``.
-
-    References
-    ----------
-    Chernozhukov, V., Hansen, C. and Spindler, M. (2015), Post-Selection and Post-Regularization Inference in Linear
-    Models with Many Controls and Instruments. American Economic Review: Papers and Proceedings, 105 (5): 486-90.
-    """
-    assert dim_x >= dim_z
-    # see https://assets.aeaweb.org/asset-server/articles-attachments/aer/app/10505/P2015_1022_app.pdf
-    xx = np.random.multivariate_normal(
-        np.zeros(2),
-        np.array([[1.0, 0.6], [0.6, 1.0]]),
-        size=[
-            n_obs,
-        ],
-    )
-    epsilon = xx[:, 0]
-    u = xx[:, 1]
-
-    sigma = toeplitz([np.power(0.5, k) for k in range(0, dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        sigma,
-        size=[
-            n_obs,
-        ],
-    )
-
-    I_z = np.eye(dim_z)
-    xi = np.random.multivariate_normal(
-        np.zeros(dim_z),
-        0.25 * I_z,
-        size=[
-            n_obs,
-        ],
-    )
-
-    beta = [1 / (k**2) for k in range(1, dim_x + 1)]
-    gamma = beta
-    delta = [1 / (k**2) for k in range(1, dim_z + 1)]
-    Pi = np.hstack((I_z, np.zeros((dim_z, dim_x - dim_z))))
-
-    z = np.dot(x, np.transpose(Pi)) + xi
-    d = np.dot(x, gamma) + np.dot(z, delta) + u
-    y = alpha * d + np.dot(x, beta) + epsilon
-
-    if return_type in _array_alias:
-        return x, y, d, z
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
-        z_cols = [f"Z{i + 1}" for i in np.arange(dim_z)]
-        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d"] + z_cols)
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, "y", "d", x_cols, z_cols)
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1.0, return_type="DoubleMLClusterData", **kwargs):
-    """
-    Generates data from a partially linear IV regression model with multiway cluster sample used in Chiang et al.
-    (2021). The data generating process is defined as
-
-    .. math::
-
-        Z_{ij} &= X_{ij}' \\xi_0 + V_{ij},
-
-        D_{ij} &= Z_{ij}' \\pi_{10} + X_{ij}' \\pi_{20} + v_{ij},
-
-        Y_{ij} &= D_{ij} \\theta + X_{ij}' \\zeta_0 + \\varepsilon_{ij},
-
-    with
-
-    .. math::
-
-        X_{ij} &= (1 - \\omega_1^X - \\omega_2^X) \\alpha_{ij}^X
-        + \\omega_1^X \\alpha_{i}^X + \\omega_2^X \\alpha_{j}^X,
-
-        \\varepsilon_{ij} &= (1 - \\omega_1^\\varepsilon - \\omega_2^\\varepsilon) \\alpha_{ij}^\\varepsilon
-        + \\omega_1^\\varepsilon \\alpha_{i}^\\varepsilon + \\omega_2^\\varepsilon \\alpha_{j}^\\varepsilon,
-
-        v_{ij} &= (1 - \\omega_1^v - \\omega_2^v) \\alpha_{ij}^v
-        + \\omega_1^v \\alpha_{i}^v + \\omega_2^v \\alpha_{j}^v,
-
-        V_{ij} &= (1 - \\omega_1^V - \\omega_2^V) \\alpha_{ij}^V
-        + \\omega_1^V \\alpha_{i}^V + \\omega_2^V \\alpha_{j}^V,
-
-    and :math:`\\alpha_{ij}^X, \\alpha_{i}^X, \\alpha_{j}^X \\sim \\mathcal{N}(0, \\Sigma)`
-    where  :math:`\\Sigma` is a :math:`p_x \\times p_x` matrix with entries
-    :math:`\\Sigma_{kj} = s_X^{|j-k|}`.
-    Further
-
-    .. math::
-
-        \\left(\\begin{matrix} \\alpha_{ij}^\\varepsilon \\\\ \\alpha_{ij}^v \\end{matrix}\\right),
-        \\left(\\begin{matrix} \\alpha_{i}^\\varepsilon \\\\ \\alpha_{i}^v \\end{matrix}\\right),
-        \\left(\\begin{matrix} \\alpha_{j}^\\varepsilon \\\\ \\alpha_{j}^v \\end{matrix}\\right)
-        \\sim \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & s_{\\varepsilon v} \\\\
-        s_{\\varepsilon v} & 1 \\end{matrix} \\right) \\right)
-
-
-    and :math:`\\alpha_{ij}^V, \\alpha_{i}^V, \\alpha_{j}^V \\sim \\mathcal{N}(0, 1)`.
-
-    Parameters
-    ----------
-    N :
-        The number of observations (first dimension).
-    M :
-        The number of observations (second dimension).
-    dim_X :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    return_type :
-        If ``'DoubleMLClusterData'`` or ``DoubleMLClusterData``, returns a ``DoubleMLClusterData`` object where
-        ``DoubleMLClusterData.data`` is a ``pd.DataFrame``.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s
-        ``(x, y, d, cluster_vars, z)``.
-    **kwargs
-        Additional keyword arguments to set non-default values for the parameters
-        :math:`\\pi_{10}=1.0`, :math:`\\omega_X = \\omega_{\\varepsilon} = \\omega_V = \\omega_v = (0.25, 0.25)`,
-        :math:`s_X = s_{\\varepsilon v} = 0.25`,
-        or the :math:`p_x`-vectors :math:`\\zeta_0 = \\pi_{20} = \\xi_0` with default entries
-        :math:`(\\zeta_{0})_j = 0.5^j`.
-
-    References
-    ----------
-    Chiang, H. D., Kato K., Ma, Y. and Sasaki, Y. (2021), Multiway Cluster Robust Double/Debiased Machine Learning,
-    Journal of Business & Economic Statistics,
-    doi: `10.1080/07350015.2021.1895815 <https://doi.org/10.1080/07350015.2021.1895815>`_,
-    arXiv:`1909.03489 <https://arxiv.org/abs/1909.03489>`_.
-    """
-    # additional parameters specifiable via kwargs
-    pi_10 = kwargs.get("pi_10", 1.0)
-
-    xx = np.arange(1, dim_X + 1)
-    zeta_0 = kwargs.get("zeta_0", np.power(0.5, xx))
-    pi_20 = kwargs.get("pi_20", np.power(0.5, xx))
-    xi_0 = kwargs.get("xi_0", np.power(0.5, xx))
-
-    omega_X = kwargs.get("omega_X", np.array([0.25, 0.25]))
-    omega_epsilon = kwargs.get("omega_epsilon", np.array([0.25, 0.25]))
-    omega_v = kwargs.get("omega_v", np.array([0.25, 0.25]))
-    omega_V = kwargs.get("omega_V", np.array([0.25, 0.25]))
-
-    s_X = kwargs.get("s_X", 0.25)
-    s_epsilon_v = kwargs.get("s_epsilon_v", 0.25)
-
-    # use np.tile() and np.repeat() for repeating vectors in different styles, i.e.,
-    # np.tile([v1, v2, v3], 2) [v1, v2, v3, v1, v2, v3]
-    # np.repeat([v1, v2, v3], 2) [v1, v1, v2, v2, v3, v3]
-
-    alpha_V = np.random.normal(size=(N * M))
-    alpha_V_i = np.repeat(np.random.normal(size=N), M)
-    alpha_V_j = np.tile(np.random.normal(size=M), N)
-
-    cov_mat = np.array([[1, s_epsilon_v], [s_epsilon_v, 1]])
-    alpha_eps_v = np.random.multivariate_normal(
-        np.zeros(2),
-        cov_mat,
-        size=[
-            N * M,
-        ],
-    )
-    alpha_eps = alpha_eps_v[:, 0]
-    alpha_v = alpha_eps_v[:, 1]
-
-    alpha_eps_v_i = np.random.multivariate_normal(
-        np.zeros(2),
-        cov_mat,
-        size=[
-            N,
-        ],
-    )
-    alpha_eps_i = np.repeat(alpha_eps_v_i[:, 0], M)
-    alpha_v_i = np.repeat(alpha_eps_v_i[:, 1], M)
-
-    alpha_eps_v_j = np.random.multivariate_normal(
-        np.zeros(2),
-        cov_mat,
-        size=[
-            M,
-        ],
-    )
-    alpha_eps_j = np.tile(alpha_eps_v_j[:, 0], N)
-    alpha_v_j = np.tile(alpha_eps_v_j[:, 1], N)
-
-    cov_mat = toeplitz([np.power(s_X, k) for k in range(dim_X)])
-    alpha_X = np.random.multivariate_normal(
-        np.zeros(dim_X),
-        cov_mat,
-        size=[
-            N * M,
-        ],
-    )
-    alpha_X_i = np.repeat(
-        np.random.multivariate_normal(
-            np.zeros(dim_X),
-            cov_mat,
-            size=[
-                N,
-            ],
-        ),
-        M,
-        axis=0,
-    )
-    alpha_X_j = np.tile(
-        np.random.multivariate_normal(
-            np.zeros(dim_X),
-            cov_mat,
-            size=[
-                M,
-            ],
-        ),
-        (N, 1),
-    )
-
-    # generate variables
-    x = (1 - omega_X[0] - omega_X[1]) * alpha_X + omega_X[0] * alpha_X_i + omega_X[1] * alpha_X_j
-
-    eps = (
-        (1 - omega_epsilon[0] - omega_epsilon[1]) * alpha_eps + omega_epsilon[0] * alpha_eps_i + omega_epsilon[1] * alpha_eps_j
-    )
-
-    v = (1 - omega_v[0] - omega_v[1]) * alpha_v + omega_v[0] * alpha_v_i + omega_v[1] * alpha_v_j
-
-    V = (1 - omega_V[0] - omega_V[1]) * alpha_V + omega_V[0] * alpha_V_i + omega_V[1] * alpha_V_j
-
-    z = np.matmul(x, xi_0) + V
-    d = z * pi_10 + np.matmul(x, pi_20) + v
-    y = d * theta + np.matmul(x, zeta_0) + eps
-
-    cluster_cols = ["cluster_var_i", "cluster_var_j"]
-    cluster_vars = pd.MultiIndex.from_product([range(N), range(M)]).to_frame(name=cluster_cols).reset_index(drop=True)
-
-    if return_type in _array_alias:
-        return x, y, d, cluster_vars.values, z
-    elif return_type in _data_frame_alias + _dml_cluster_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_X)]
-        data = pd.concat((cluster_vars, pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["Y", "D", "Z"])), axis=1)
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLClusterData(data, "Y", "D", cluster_cols, x_cols, "Z")
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def make_confounded_irm_data(n_obs=500, theta=0.0, gamma_a=0.127, beta_a=0.58, linear=False, **kwargs):
-    """
-    Generates counfounded data from an interactive regression model.
-
-    The data generating process is defined as follows (inspired by the Monte Carlo simulation used
-    in Sant'Anna and Zhao (2020)).
-
-    Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` corresponds
-    to the identity matrix.
-    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
-    where
-
-    .. math::
-
-        \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
-
-        \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
-
-        \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
-
-        \\tilde{Z}_4 &= (20 + X_2 + X_4)^2
-
-        \\tilde{Z}_5 &= X_5.
-
-    Additionally, generate a confounder :math:`A \\sim \\mathcal{U}[-1, 1]`.
-    At first, define the propensity score as
-
-    .. math::
-
-        m(X, A) = P(D=1|X,A) = p(Z) + \\gamma_A \\cdot A
-
-    where
-
-    .. math::
-
-        p(Z) &= \\frac{\\exp(f_{ps}(Z))}{1 + \\exp(f_{ps}(Z))},
-
-        f_{ps}(Z) &= 0.75 \\cdot (-Z_1 + 0.1 \\cdot Z_2 -0.25 \\cdot Z_3 - 0.1 \\cdot Z_4).
-
-    and generate the treatment :math:`D = 1\\{m(X, A) \\ge U\\}` with :math:`U \\sim \\mathcal{U}[0, 1]`.
-    Since :math:`A` is independent of :math:`X`, the short form of the propensity score is given as
-
-    .. math::
-
-        P(D=1|X) = p(Z).
-
-    Further, generate the outcome of interest :math:`Y` as
-
-    .. math::
-
-        Y &= \\theta \\cdot D (Z_5 + 1) + g(Z) + \\beta_A \\cdot A + \\varepsilon
-
-        g(Z) &= 2.5 + 0.74 \\cdot Z_1 + 0.25 \\cdot Z_2 + 0.137 \\cdot (Z_3 + Z_4)
-
-    where :math:`\\varepsilon \\sim \\mathcal{N}(0,5)`.
-    This implies an average treatment effect of :math:`\\theta`. Additionally, the long and short forms of
-    the conditional expectation take the following forms
-
-    .. math::
-
-        \\mathbb{E}[Y|D, X, A] &= \\theta \\cdot D (Z_5 + 1) + g(Z) + \\beta_A \\cdot A
-
-        \\mathbb{E}[Y|D, X] &= (\\theta + \\beta_A \\frac{\\mathrm{Cov}(A, D(Z_5 + 1))}{\\mathrm{Var}(D(Z_5 + 1))})
-            \\cdot D (Z_5 + 1) + g(Z).
-
-    Consequently, the strength of confounding is determined via :math:`\\gamma_A` and :math:`\\beta_A`, which can be
-    set via the parameters ``gamma_a`` and ``beta_a``.
-
-    The observed data is given as :math:`W = (Y, D, Z)`.
-    Further, orcale values of the confounder :math:`A`, the transformed covariated :math:`Z`,
-    the potential outcomes of :math:`Y`, the long and short forms of the main regression and the propensity score and
-    in sample versions of the confounding parameters :math:`cf_d` and :math:`cf_y` (for ATE and ATTE)
-    are returned in a dictionary.
-
-    Parameters
-    ----------
-    n_obs : int
-        The number of observations to simulate.
-        Default is ``500``.
-    theta : float or int
-        Average treatment effect.
-        Default is ``0.0``.
-    gamma_a : float
-        Coefficient of the unobserved confounder in the propensity score.
-        Default is ``0.127``.
-    beta_a : float
-        Coefficient of the unobserved confounder in the outcome regression.
-        Default is ``0.58``.
-    linear : bool
-        If ``True``, the Z will be set to X, such that the underlying (short) models are linear/logistic.
-        Default is ``False``.
-
-    Returns
-    -------
-    res_dict : dictionary
-       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
-
-    References
-    ----------
-    Sant’Anna, P. H. and Zhao, J. (2020),
-    Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
-    doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
-    """
-    c = 0.0  # the confounding strength is only valid for c=0
-    xi = 0.75
-    dim_x = kwargs.get("dim_x", 5)
-    trimming_threshold = kwargs.get("trimming_threshold", 0.01)
-    var_eps_y = kwargs.get("var_eps_y", 1.0)
-
-    # Specification of main regression function
-    def f_reg(w):
-        res = 2.5 + 0.74 * w[:, 0] + 0.25 * w[:, 1] + 0.137 * (w[:, 2] + w[:, 3])
-        return res
-
-    # Specification of prop score function
-    def f_ps(w, xi):
-        res = xi * (-w[:, 0] + 0.1 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
-        return res
-
-    # observed covariates
-    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        cov_mat,
-        size=[
-            n_obs,
-        ],
-    )
-    z_tilde_1 = np.exp(0.5 * x[:, 0])
-    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
-    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
-    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
-    z_tilde_5 = x[:, 4]
-    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, z_tilde_5))
-    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
-    # error terms and unobserved confounder
-    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
-    # unobserved confounder
-    a_bounds = (-1, 1)
-    a = np.random.uniform(low=a_bounds[0], high=a_bounds[1], size=n_obs)
-    var_a = np.square(a_bounds[1] - a_bounds[0]) / 12
-
-    # Choose the features used in the models
-    if linear:
-        features_ps = x
-        features_reg = x
-    else:
-        features_ps = z
-        features_reg = z
-
-    p = np.exp(f_ps(features_ps, xi)) / (1 + np.exp(f_ps(features_ps, xi)))
-    # compute short and long form of propensity score
-    m_long = p + gamma_a * a
-    m_short = p
-    # check propensity score bounds
-    if np.any(m_long < trimming_threshold) or np.any(m_long > 1.0 - trimming_threshold):
-        m_long = np.clip(m_long, trimming_threshold, 1.0 - trimming_threshold)
-        m_short = np.clip(m_short, trimming_threshold, 1.0 - trimming_threshold)
-        warnings.warn(
-            f"Propensity score is close to 0 or 1. "
-            f"Trimming is at {trimming_threshold} and {1.0 - trimming_threshold} is applied"
-        )
-    # generate treatment based on long form
-    u = np.random.uniform(low=0, high=1, size=n_obs)
-    d = 1.0 * (m_long >= u)
-    # add treatment heterogeneity
-    d1x = z[:, 4] + 1
-    var_dx = np.var(d * (d1x))
-    cov_adx = gamma_a * var_a
-    # Outcome regression
-    g_partial_reg = f_reg(features_reg)
-    # short model
-    g_short_d0 = g_partial_reg
-    g_short_d1 = (theta + beta_a * cov_adx / var_dx) * d1x + g_partial_reg
-    g_short = d * g_short_d1 + (1.0 - d) * g_short_d0
-    # long model
-    g_long_d0 = g_partial_reg + beta_a * a
-    g_long_d1 = theta * d1x + g_partial_reg + beta_a * a
-    g_long = d * g_long_d1 + (1.0 - d) * g_long_d0
-    # Potential outcomes
-    y_0 = g_long_d0 + eps_y
-    y_1 = g_long_d1 + eps_y
-    # Realized outcome
-    y = d * y_1 + (1.0 - d) * y_0
-    # In-sample values for confounding strength
-    explained_residual_variance = np.square(g_long - g_short)
-    residual_variance = np.square(y - g_short)
-    cf_y = np.mean(explained_residual_variance) / np.mean(residual_variance)
-    # compute the Riesz representation
-    treated_weight = d / np.mean(d)
-    untreated_weight = (1.0 - d) / np.mean(d)
-    # Odds ratios
-    propensity_ratio_long = m_long / (1.0 - m_long)
-    rr_long_ate = d / m_long - (1.0 - d) / (1.0 - m_long)
-    rr_long_atte = treated_weight - np.multiply(untreated_weight, propensity_ratio_long)
-    propensity_ratio_short = m_short / (1.0 - m_short)
-    rr_short_ate = d / m_short - (1.0 - d) / (1.0 - m_short)
-    rr_short_atte = treated_weight - np.multiply(untreated_weight, propensity_ratio_short)
-    cf_d_ate = (np.mean(1 / (m_long * (1 - m_long))) - np.mean(1 / (m_short * (1 - m_short)))) / np.mean(
-        1 / (m_long * (1 - m_long))
-    )
-    cf_d_atte = (np.mean(propensity_ratio_long) - np.mean(propensity_ratio_short)) / np.mean(propensity_ratio_long)
-    if (beta_a == 0) | (gamma_a == 0):
-        rho_ate = 0.0
-        rho_atte = 0.0
-    else:
-        rho_ate = np.corrcoef((g_long - g_short), (rr_long_ate - rr_short_ate))[0, 1]
-        rho_atte = np.corrcoef((g_long - g_short), (rr_long_atte - rr_short_atte))[0, 1]
-    oracle_values = {
-        "g_long": g_long,
-        "g_short": g_short,
-        "m_long": m_long,
-        "m_short": m_short,
-        "gamma_a": gamma_a,
-        "beta_a": beta_a,
-        "a": a,
-        "y_0": y_0,
-        "y_1": y_1,
-        "z": z,
-        "cf_y": cf_y,
-        "cf_d_ate": cf_d_ate,
-        "cf_d_atte": cf_d_atte,
-        "rho_ate": rho_ate,
-        "rho_atte": rho_atte,
-    }
-    res_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
-    return res_dict
-
-
-def make_confounded_plr_data(n_obs=500, theta=5.0, cf_y=0.04, cf_d=0.04, **kwargs):
-    """
-    Generates counfounded data from an partially linear regression model.
-
-    The data generating process is defined as follows (similar to the Monte Carlo simulation used
-    in Sant'Anna and Zhao (2020)). Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`,
-    where  :math:`\\Sigma` is a matrix with entries
-    :math:`\\Sigma_{kj} = c^{|j-k|}`. The default value is  :math:`c = 0`, corresponding to the identity matrix.
-    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
-    where
-
-    .. math::
-
-        \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
-
-        \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
-
-        \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
-
-        \\tilde{Z}_4 &= (20 + X_2 + X_4)^2.
-
-    Additionally, generate a confounder :math:`A \\sim \\mathcal{U}[-1, 1]`.
-    At first, define the treatment as
-
-    .. math::
-
-        D = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4 + \\gamma_A \\cdot A + \\varepsilon_D
-
-    and with :math:`\\varepsilon \\sim \\mathcal{N}(0,1)`.
-    Since :math:`A` is independent of :math:`X`, the long and short form of the treatment regression are given as
-
-    .. math::
-
-        E[D|X,A] = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4 + \\gamma_A \\cdot A
-
-        E[D|X] = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4.
-
-    Further, generate the outcome of interest :math:`Y` as
-
-    .. math::
-
-        Y &= \\theta \\cdot D + g(Z) + \\beta_A \\cdot A + \\varepsilon
-
-        g(Z) &= 210 + 27.4 \\cdot Z_1 +13.7 \\cdot (Z_2 + Z_3 + Z_4)
-
-    where :math:`\\varepsilon \\sim \\mathcal{N}(0,5)`.
-    This implies an average treatment effect of :math:`\\theta`. Additionally, the long and short forms of
-    the conditional expectation take the following forms
-
-    .. math::
-
-        \\mathbb{E}[Y|D, X, A] &= \\theta \\cdot D + g(Z) + \\beta_A \\cdot A
-
-        \\mathbb{E}[Y|D, X] &= (\\theta + \\gamma_A\\beta_A \\frac{\\mathrm{Var}(A)}{\\mathrm{Var}(D)}) \\cdot D + g(Z).
-
-    Consequently, the strength of confounding is determined via :math:`\\gamma_A` and :math:`\\beta_A`.
-    Both are chosen to obtain the desired confounding of the outcome and Riesz Representer (in sample).
-
-    The observed data is given as :math:`W = (Y, D, X)`.
-    Further, orcale values of the confounder :math:`A`, the transformed covariated :math:`Z`, the effect :math:`\\theta`,
-    the coefficients :math:`\\gamma_a`, :math:`\\beta_a`, the long and short forms of the main regression and
-    the propensity score are returned in a dictionary.
-
-    Parameters
-    ----------
-    n_obs : int
-        The number of observations to simulate.
-        Default is ``500``.
-    theta : float or int
-        Average treatment effect.
-        Default is ``5.0``.
-    cf_y : float
-        Percentage of the residual variation of the outcome explained by latent/confounding variable.
-        Default is ``0.04``.
-    cf_d : float
-        Percentage gains in the variation of the Riesz Representer generated by latent/confounding variable.
-        Default is ``0.04``.
-
-    Returns
-    -------
-    res_dict : dictionary
-       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
-
-    References
-    ----------
-    Sant’Anna, P. H. and Zhao, J. (2020),
-    Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
-    doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
-    """
-    c = kwargs.get("c", 0.0)
-    dim_x = kwargs.get("dim_x", 4)
-
-    # observed covariates
-    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        cov_mat,
-        size=[
-            n_obs,
-        ],
-    )
-
-    z_tilde_1 = np.exp(0.5 * x[:, 0])
-    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
-    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
-    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
-
-    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
-    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
-
-    # error terms
-    var_eps_y = 5
-    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
-    var_eps_d = 1
-    eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs)
-
-    # unobserved confounder
-    a_bounds = (-1, 1)
-    a = np.random.uniform(low=a_bounds[0], high=a_bounds[1], size=n_obs)
-    var_a = np.square(a_bounds[1] - a_bounds[0]) / 12
-
-    # get the required impact of the confounder on the propensity score
-    m_short = -z[:, 0] + 0.5 * z[:, 1] - 0.25 * z[:, 2] - 0.1 * z[:, 3]
-
-    def f_m(gamma_a):
-        rr_long = eps_d / var_eps_d
-        rr_short = (gamma_a * a + eps_d) / (gamma_a**2 * var_a + var_eps_d)
-        C2_D = (np.mean(np.square(rr_long)) - np.mean(np.square(rr_short))) / np.mean(np.square(rr_short))
-        return np.square(C2_D / (1 + C2_D) - cf_d)
-
-    gamma_a = minimize_scalar(f_m).x
-    m_long = m_short + gamma_a * a
-    d = m_long + eps_d
-
-    # short and long version of g
-    g_partial_reg = 210 + 27.4 * z[:, 0] + 13.7 * (z[:, 1] + z[:, 2] + z[:, 3])
-
-    var_d = np.var(d)
-
-    def f_g(beta_a):
-        g_diff = beta_a * (a - gamma_a * (var_a / var_d) * d)
-        y_diff = eps_y + g_diff
-        return np.square(np.mean(np.square(g_diff)) / np.mean(np.square(y_diff)) - cf_y)
-
-    beta_a = minimize_scalar(f_g).x
-
-    g_long = theta * d + g_partial_reg + beta_a * a
-    g_short = (theta + gamma_a * beta_a * var_a / var_d) * d + g_partial_reg
-
-    y = g_long + eps_y
-
-    oracle_values = {
-        "g_long": g_long,
-        "g_short": g_short,
-        "m_long": m_long,
-        "m_short": m_short,
-        "theta": theta,
-        "gamma_a": gamma_a,
-        "beta_a": beta_a,
-        "a": a,
-        "z": z,
-    }
-
-    res_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
-
-    return res_dict
-
-
-def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treatment=False):
-    """
-    Creates a simple synthetic example for heterogeneous treatment effects.
-    The data generating process is based on the Monte Carlo simulation from Oprescu et al. (2019).
-
-    The data is generated as
-
-    .. math::
-
-        Y_i & = \\theta_0(X_i)D_i + \\langle X_i,\\gamma_0\\rangle + \\epsilon_i
-
-        D_i & = \\langle X_i,\\beta_0\\rangle + \\eta_i,
-
-    where :math:`X_i\\sim\\mathcal{U}[0,1]^{p}` and :math:`\\epsilon_i,\\eta_i
-    \\sim\\mathcal{U}[-1,1]`.
-    If the treatment is set to be binary, the treatment is generated as
-
-    .. math::
-        D_i = 1\\{\\langle X_i,\\beta_0\\rangle \\ge \\eta_i\\}.
-
-    The coefficient vectors :math:`\\gamma_0` and :math:`\\beta_0` both have small random (identical) support
-    which values are drawn independently from :math:`\\mathcal{U}[0,1]` and :math:`\\mathcal{U}[0,0.3]`.
-    Further, :math:`\\theta_0(x)` defines the conditional treatment effect, which is defined differently depending
-    on the dimension of :math:`x`.
-
-    If the heterogeneity is univariate the conditional treatment effect takes the following form
-
-    .. math::
-            \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_0),
-
-    whereas for the two-dimensional case the conditional treatment effect is defined as
-
-    .. math::
-        \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_1).
-
-    Parameters
-    ----------
-    n_obs : int
-        Number of observations to simulate.
-        Default is ``200``.
-
-    p : int
-        Dimension of covariates.
-        Default is ``30``.
-
-    support_size : int
-        Number of relevant (confounding) covariates.
-        Default is ``5``.
-
-    n_x : int
-        Dimension of the heterogeneity. Can be either ``1`` or ``2``.
-        Default is ``1``.
-
-    binary_treatment : bool
-        Indicates whether the treatment is binary.
-        Default is ``False``.
-
-    Returns
-    -------
-    res_dict : dictionary
-       Dictionary with entries ``data``, ``effects``, ``treatment_effect``.
-
-    """
-    # simple input checks
-    assert n_x in [1, 2], "n_x must be either 1 or 2."
-    assert support_size <= p, "support_size must be smaller than p."
-    assert isinstance(binary_treatment, bool), "binary_treatment must be a boolean."
-
-    # define treatment effects
-    if n_x == 1:
-
-        def treatment_effect(x):
-            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 0])
-
-    else:
-        assert n_x == 2
-
-        # redefine treatment effect
-        def treatment_effect(x):
-            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 1])
-
-    # Outcome support and coefficients
-    support_y = np.random.choice(np.arange(p), size=support_size, replace=False)
-    coefs_y = np.random.uniform(0, 1, size=support_size)
-    # treatment support and coefficients
-    support_d = support_y
-    coefs_d = np.random.uniform(0, 0.3, size=support_size)
-
-    # noise
-    epsilon = np.random.uniform(-1, 1, size=n_obs)
-    eta = np.random.uniform(-1, 1, size=n_obs)
-
-    # Generate controls, covariates, treatments and outcomes
-    x = np.random.uniform(0, 1, size=(n_obs, p))
-    # Heterogeneous treatment effects
-    te = treatment_effect(x)
-    if binary_treatment:
-        d = 1.0 * (np.dot(x[:, support_d], coefs_d) >= eta)
-    else:
-        d = np.dot(x[:, support_d], coefs_d) + eta
-    y = te * d + np.dot(x[:, support_y], coefs_y) + epsilon
-
-    # Now we build the dataset
-    y_df = pd.DataFrame({"y": y})
-    d_df = pd.DataFrame({"d": d})
-    x_df = pd.DataFrame(data=x, index=np.arange(x.shape[0]), columns=[f"X_{i}" for i in range(x.shape[1])])
-
-    data = pd.concat([y_df, d_df, x_df], axis=1)
-    res_dict = {"data": data, "effects": te, "treatment_effect": treatment_effect}
-    return res_dict
-
-
-def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type="DoubleMLData"):
-    """
-    Generates data from a sample selection model (SSM).
-    The data generating process is defined as
-
-    .. math::
-
-        y_i &= \\theta d_i + x_i' \\beta d_i + u_i,
-
-        s_i &= 1\\left\\lbrace d_i + \\gamma z_i + x_i' \\beta + v_i > 0 \\right\\rbrace,
-
-        d_i &= 1\\left\\lbrace x_i' \\beta + w_i > 0 \\right\\rbrace,
-
-    with Y being observed if :math:`s_i = 1` and covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma^2_x)`, where
-    :math:`\\Sigma^2_x` is a matrix with entries
-    :math:`\\Sigma_{kj} = 0.5^{|j-k|}`.
-    :math:`\\beta` is a `dim_x`-vector with entries :math:`\\beta_j=\\frac{0.4}{j^2}`
-    :math:`z_i \\sim \\mathcal{N}(0, 1)`,
-    :math:`(u_i,v_i) \\sim \\mathcal{N}(0, \\Sigma^2_{u,v})`,
-    :math:`w_i \\sim \\mathcal{N}(0, 1)`.
-
-
-    The data generating process is inspired by a process used in the simulation study (see Appendix E) of Bia,
-    Huber and Lafférs (2023).
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    mar:
-        Boolean. Indicates whether missingness at random holds.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z, s)``.
-
-    References
-    ----------
-    Michela Bia, Martin Huber & Lukáš Lafférs (2023) Double Machine Learning for Sample Selection Models,
-    Journal of Business & Economic Statistics, DOI: 10.1080/07350015.2023.2271071
-    """
-    if mar:
-        sigma = np.array([[1, 0], [0, 1]])
-        gamma = 0
-    else:
-        sigma = np.array([[1, 0.8], [0.8, 1]])
-        gamma = 1
-
-    e = np.random.multivariate_normal(mean=[0, 0], cov=sigma, size=n_obs).T
-
-    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        cov_mat,
-        size=[
-            n_obs,
-        ],
-    )
-
-    beta = [0.4 / (k**2) for k in range(1, dim_x + 1)]
-
-    d = np.where(np.dot(x, beta) + np.random.randn(n_obs) > 0, 1, 0)
-    z = np.random.randn(n_obs)
-    s = np.where(np.dot(x, beta) + d + gamma * z + e[0] > 0, 1, 0)
-
-    y = np.dot(x, beta) + theta * d + e[1]
-    y[s == 0] = 0
-
-    if return_type in _array_alias:
-        return x, y, d, z, s
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
-        if mar:
-            data = pd.DataFrame(np.column_stack((x, y, d, s)), columns=x_cols + ["y", "d", "s"])
-        else:
-            data = pd.DataFrame(np.column_stack((x, y, d, z, s)), columns=x_cols + ["y", "d", "z", "s"])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            if mar:
-                return DoubleMLData(data, "y", "d", x_cols, None, None, "s")
-            return DoubleMLData(data, "y", "d", x_cols, "z", None, "s")
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, linear=False, random_state=None, **kwargs):
-    """
-    Generates data from a interactive regression (IRM) model with multiple treatment levels (based on an
-    underlying continous treatment).
-
-    The data generating process is defined as follows (similar to the Monte Carlo simulation used
-    in Sant'Anna and Zhao (2020)).
-
-    Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` corresponds
-    to the identity matrix.
-    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
-    where
-
-    .. math::
-
-            \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
-
-            \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
-
-            \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
-
-            \\tilde{Z}_4 &= (20 + X_2 + X_4)^2
-
-            \\tilde{Z}_5 &= X_5.
-
-    A continuous treatment :math:`D_{\\text{cont}}` is generated as
-
-    .. math::
-
-        D_{\\text{cont}} = \\xi (-Z_1 + 0.5 Z_2 - 0.25 Z_3 - 0.1 Z_4) + \\varepsilon_D,
-
-    where :math:`\\varepsilon_D \\sim \\mathcal{N}(0,1)` and :math:`\\xi=0.3`. The corresponding treatment
-    effect is defined as
-
-    .. math::
-
-        \\theta (d) = 0.1 \\exp(d) + 10 \\sin(0.7 d) + 2 d - 0.2 d^2.
-
-    Based on the continous treatment, a discrete treatment :math:`D` is generated as with a baseline level of
-    :math:`D=0` and additional levels based on the quantiles of :math:`D_{\\text{cont}}`. The number of levels
-    is defined by :math:`n_{\\text{levels}}`. Each level is chosen to have the same probability of being selected.
-
-    The potential outcomes are defined as
-
-    .. math::
-
-            Y(0) &= 210 + 27.4 Z_1 + 13.7 (Z_2 + Z_3 + Z_4) + \\varepsilon_Y
-
-            Y(1) &= \\theta (D_{\\text{cont}}) 1\\{D_{\\text{cont}} > 0\\} + Y(0),
-
-    where :math:`\\varepsilon_Y \\sim \\mathcal{N}(0,5)`. Further, the observed outcome is defined as
-
-    .. math::
-
-        Y = Y(1) 1\\{D > 0\\} + Y(0) 1\\{D = 0\\}.
-
-    The data is returned as a dictionary with the entries ``x``, ``y``, ``d`` and ``oracle_values``.
-
-    Parameters
-    ----------
-    n_obs : int
-        The number of observations to simulate.
-        Default is ``200``.
-
-    n_levels : int
-        The number of treatment levels.
-        Default is ``3``.
-
-    linear : bool
-        Indicates whether the true underlying regression is linear.
-        Default is ``False``.
-
-    random_state : int
-        Random seed for reproducibility.
-        Default is ``42``.
-
-    Returns
-    -------
-    res_dict : dictionary
-       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
-       The oracle values contain the continuous treatment, the level bounds, the potential level, ITE
-       and the potential outcome without treatment.
-
-    """
-    if random_state is not None:
-        np.random.seed(random_state)
-    xi = kwargs.get("xi", 0.3)
-    c = kwargs.get("c", 0.0)
-    dim_x = kwargs.get("dim_x", 5)
-
-    if not isinstance(n_levels, int):
-        raise ValueError("n_levels must be an integer.")
-    if n_levels < 2:
-        raise ValueError("n_levels must be at least 2.")
-
-    # observed covariates
-    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        cov_mat,
-        size=[
-            n_obs,
-        ],
-    )
-
-    def f_reg(w):
-        res = 210 + 27.4 * w[:, 0] + 13.7 * (w[:, 1] + w[:, 2] + w[:, 3])
-        return res
-
-    def f_treatment(w, xi):
-        res = xi * (-w[:, 0] + 0.5 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
-        return res
-
-    def treatment_effect(d, scale=15):
-        return scale * (1 / (1 + np.exp(-d - 1.2 * np.cos(d)))) - 2
-
-    z_tilde_1 = np.exp(0.5 * x[:, 0])
-    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
-    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
-    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
-
-    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
-    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
-
-    # error terms
-    var_eps_y = 5
-    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
-    var_eps_d = 1
-    eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs)
-
-    if linear:
-        g = f_reg(x)
-        m = f_treatment(x, xi)
-    else:
-        assert not linear
-        g = f_reg(z)
-        m = f_treatment(z, xi)
-
-    cont_d = m + eps_d
-    level_bounds = np.quantile(cont_d, q=np.linspace(0, 1, n_levels + 1))
-    potential_level = sum([1.0 * (cont_d >= bound) for bound in level_bounds[1:-1]]) + 1
-    eta = np.random.uniform(0, 1, size=n_obs)
-    d = 1.0 * (eta >= 1 / n_levels) * potential_level
-
-    ite = treatment_effect(cont_d)
-    y0 = g + eps_y
-    # only treated for d > 0 compared to the baseline
-    y = ite * (d > 0) + y0
-
-    oracle_values = {
-        "cont_d": cont_d,
-        "level_bounds": level_bounds,
-        "potential_level": potential_level,
-        "ite": ite,
-        "y0": y0,
-    }
-
-    resul_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
-
-    return resul_dict
diff --git a/doubleml/datasets/__init__.py b/doubleml/datasets/__init__.py
new file mode 100644
index 00000000..6a64a5c8
--- /dev/null
+++ b/doubleml/datasets/__init__.py
@@ -0,0 +1,13 @@
+"""
+The :mod:`doubleml.datasets` module implements data generating processes for double machine learning simulations and provides access to real datasets.
+"""
+
+# Import fetch functions
+from .fetch_401K import fetch_401K
+from .fetch_bonus import fetch_bonus
+
+
+__all__ = [
+    "fetch_401K",
+    "fetch_bonus",
+]
diff --git a/doubleml/datasets/fetch_401K.py b/doubleml/datasets/fetch_401K.py
new file mode 100644
index 00000000..05a97fe7
--- /dev/null
+++ b/doubleml/datasets/fetch_401K.py
@@ -0,0 +1,65 @@
+"""
+Data set on financial wealth and 401(k) plan participation.
+"""
+
+import pandas as pd
+from doubleml import DoubleMLData
+
+
+def _get_array_alias():
+    return ["array", "np.array", "np.ndarray"]
+
+
+def _get_data_frame_alias():
+    return ["DataFrame", "pd.DataFrame", "pandas.DataFrame"]
+
+
+def _get_dml_data_alias():
+    return ["DoubleMLData"]
+
+
+def fetch_401K(return_type="DoubleMLData", polynomial_features=False):
+    """
+    Data set on financial wealth and 401(k) plan participation.
+
+    Parameters
+    ----------
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+    polynomial_features :
+        If ``True`` polynomial features are added (see replication files of Chernozhukov et al. (2018)).
+
+    References
+    ----------
+    Abadie, A. (2003), Semiparametric instrumental variable estimation of treatment response models. Journal of
+    Econometrics, 113(2): 231-263.
+
+    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
+    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
+    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
+    """
+    _array_alias = _get_array_alias()
+    _data_frame_alias = _get_data_frame_alias()
+    _dml_data_alias = _get_dml_data_alias()
+    
+    url = "https://github.com/VC2015/DMLonGitHub/raw/master/sipp1991.dta"
+    raw_data = pd.read_stata(url)
+
+    y_col = "net_tfa"
+    d_cols = ["e401"]
+    x_cols = ["age", "inc", "educ", "fsize", "marr", "twoearn", "db", "pira", "hown"]
+
+    data = raw_data.copy()
+
+    if polynomial_features:
+        raise NotImplementedError("polynomial_features os not implemented yet for fetch_401K.")
+
+    if return_type in _data_frame_alias + _dml_data_alias:
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, y_col, d_cols, x_cols)
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/datasets/fetch_bonus.py b/doubleml/datasets/fetch_bonus.py
new file mode 100644
index 00000000..155100c3
--- /dev/null
+++ b/doubleml/datasets/fetch_bonus.py
@@ -0,0 +1,98 @@
+"""
+Data set on the Pennsylvania Reemployment Bonus experiment.
+"""
+
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures
+from doubleml import DoubleMLData
+
+
+def _get_array_alias():
+    return ["array", "np.array", "np.ndarray"]
+
+
+def _get_data_frame_alias():
+    return ["DataFrame", "pd.DataFrame", "pandas.DataFrame"]
+
+
+def _get_dml_data_alias():
+    return ["DoubleMLData"]
+
+
+def fetch_bonus(return_type="DoubleMLData", polynomial_features=False):
+    """
+    Data set on the Pennsylvania Reemployment Bonus experiment.
+
+    Parameters
+    ----------
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+    polynomial_features :
+        If ``True`` polynomial features are added (see replication files of Chernozhukov et al. (2018)).
+
+    References
+    ----------
+    Bilias Y. (2000), Sequential Testing of Duration Data: The Case of Pennsylvania 'Reemployment Bonus' Experiment.
+    Journal of Applied Econometrics, 15(6): 575-594.
+
+    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
+    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
+    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
+    """
+    _array_alias = _get_array_alias()
+    _data_frame_alias = _get_data_frame_alias()
+    _dml_data_alias = _get_dml_data_alias()
+    
+    url = "https://raw.githubusercontent.com/VC2015/DMLonGitHub/master/penn_jae.dat"
+    raw_data = pd.read_csv(url, sep=r"\s+")
+
+    ind = (raw_data["tg"] == 0) | (raw_data["tg"] == 4)
+    data = raw_data.copy()[ind]
+    data.reset_index(inplace=True)
+    data["tg"] = data["tg"].replace(4, 1)
+    data["inuidur1"] = np.log(data["inuidur1"])
+
+    # variable dep as factor (dummy encoding)
+    dummy_enc = OneHotEncoder(drop="first", categories="auto").fit(data.loc[:, ["dep"]])
+    xx = dummy_enc.transform(data.loc[:, ["dep"]]).toarray()
+    data["dep1"] = xx[:, 0]
+    data["dep2"] = xx[:, 1]
+
+    y_col = "inuidur1"
+    d_cols = ["tg"]
+    x_cols = [
+        "female",
+        "black",
+        "othrace",
+        "dep1",
+        "dep2",
+        "q2",
+        "q3",
+        "q4",
+        "q5",
+        "q6",
+        "agelt35",
+        "agegt54",
+        "durable",
+        "lusd",
+        "husd",
+    ]
+
+    if polynomial_features:
+        poly = PolynomialFeatures(2, include_bias=False)
+        data_transf = poly.fit_transform(data[x_cols])
+        x_cols = list(poly.get_feature_names_out(x_cols))
+
+        data_transf = pd.DataFrame(data_transf, columns=x_cols)
+        data = pd.concat((data[[y_col] + d_cols], data_transf), axis=1, sort=False)
+
+    if return_type in _data_frame_alias + _dml_data_alias:
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, y_col, d_cols, x_cols)
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/irm/datasets/__init__.py b/doubleml/irm/datasets/__init__.py
new file mode 100644
index 00000000..05f95134
--- /dev/null
+++ b/doubleml/irm/datasets/__init__.py
@@ -0,0 +1,20 @@
+"""
+The :mod:`doubleml.irm.datasets` module implements data generating processes for interactive regression models.
+"""
+
+from .dgp_confounded_irm_data import make_confounded_irm_data
+from .dgp_heterogeneous_data import make_heterogeneous_data
+from .dgp_iivm_data import make_iivm_data
+from .dgp_irm_data import make_irm_data
+from .dgp_irm_data_discrete_treatments import make_irm_data_discrete_treatments
+from .dgp_ssm_data import make_ssm_data
+
+
+__all__ = [
+    "make_confounded_irm_data",
+    "make_heterogeneous_data",
+    "make_iivm_data",
+    "make_irm_data",
+    "make_irm_data_discrete_treatments",
+    "make_ssm_data",
+]
diff --git a/doubleml/irm/datasets/dgp_confounded_irm_data.py b/doubleml/irm/datasets/dgp_confounded_irm_data.py
new file mode 100644
index 00000000..2452e896
--- /dev/null
+++ b/doubleml/irm/datasets/dgp_confounded_irm_data.py
@@ -0,0 +1,232 @@
+import numpy as np
+import warnings
+from scipy.linalg import toeplitz
+
+
+def make_confounded_irm_data(n_obs=500, theta=0.0, gamma_a=0.127, beta_a=0.58, linear=False, **kwargs):
+    """
+    Generates counfounded data from an interactive regression model.
+
+    The data generating process is defined as follows (inspired by the Monte Carlo simulation used
+    in Sant'Anna and Zhao (2020)).
+
+    Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` corresponds
+    to the identity matrix.
+    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
+    where
+
+    .. math::
+
+        \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
+
+        \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
+
+        \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
+
+        \\tilde{Z}_4 &= (20 + X_2 + X_4)^2
+
+        \\tilde{Z}_5 &= X_5.
+
+    Additionally, generate a confounder :math:`A \\sim \\mathcal{U}[-1, 1]`.
+    At first, define the propensity score as
+
+    .. math::
+
+        m(X, A) = P(D=1|X,A) = p(Z) + \\gamma_A \\cdot A
+
+    where
+
+    .. math::
+
+        p(Z) &= \\frac{\\exp(f_{ps}(Z))}{1 + \\exp(f_{ps}(Z))},
+
+        f_{ps}(Z) &= 0.75 \\cdot (-Z_1 + 0.1 \\cdot Z_2 -0.25 \\cdot Z_3 - 0.1 \\cdot Z_4).
+
+    and generate the treatment :math:`D = 1\\{m(X, A) \\ge U\\}` with :math:`U \\sim \\mathcal{U}[0, 1]`.
+    Since :math:`A` is independent of :math:`X`, the short form of the propensity score is given as
+
+    .. math::
+
+        P(D=1|X) = p(Z).
+
+    Further, generate the outcome of interest :math:`Y` as
+
+    .. math::
+
+        Y &= \\theta \\cdot D (Z_5 + 1) + g(Z) + \\beta_A \\cdot A + \\varepsilon
+
+        g(Z) &= 2.5 + 0.74 \\cdot Z_1 + 0.25 \\cdot Z_2 + 0.137 \\cdot (Z_3 + Z_4)
+
+    where :math:`\\varepsilon \\sim \\mathcal{N}(0,5)`.
+    This implies an average treatment effect of :math:`\\theta`. Additionally, the long and short forms of
+    the conditional expectation take the following forms
+
+    .. math::
+
+        \\mathbb{E}[Y|D, X, A] &= \\theta \\cdot D (Z_5 + 1) + g(Z) + \\beta_A \\cdot A
+
+        \\mathbb{E}[Y|D, X] &= (\\theta + \\beta_A \\frac{\\mathrm{Cov}(A, D(Z_5 + 1))}{\\mathrm{Var}(D(Z_5 + 1))})
+            \\cdot D (Z_5 + 1) + g(Z).
+
+    Consequently, the strength of confounding is determined via :math:`\\gamma_A` and :math:`\\beta_A`, which can be
+    set via the parameters ``gamma_a`` and ``beta_a``.
+
+    The observed data is given as :math:`W = (Y, D, Z)`.
+    Further, orcale values of the confounder :math:`A`, the transformed covariated :math:`Z`,
+    the potential outcomes of :math:`Y`, the long and short forms of the main regression and the propensity score and
+    in sample versions of the confounding parameters :math:`cf_d` and :math:`cf_y` (for ATE and ATTE)
+    are returned in a dictionary.
+
+    Parameters
+    ----------
+    n_obs : int
+        The number of observations to simulate.
+        Default is ``500``.
+    theta : float or int
+        Average treatment effect.
+        Default is ``0.0``.
+    gamma_a : float
+        Coefficient of the unobserved confounder in the propensity score.
+        Default is ``0.127``.
+    beta_a : float
+        Coefficient of the unobserved confounder in the outcome regression.
+        Default is ``0.58``.
+    linear : bool
+        If ``True``, the Z will be set to X, such that the underlying (short) models are linear/logistic.
+        Default is ``False``.
+
+    Returns
+    -------
+    res_dict : dictionary
+       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
+
+    References
+    ----------
+    Sant'Anna, P. H. and Zhao, J. (2020),
+    Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
+    doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
+    """
+    c = 0.0  # the confounding strength is only valid for c=0
+    xi = 0.75
+    dim_x = kwargs.get("dim_x", 5)
+    trimming_threshold = kwargs.get("trimming_threshold", 0.01)
+    var_eps_y = kwargs.get("var_eps_y", 1.0)
+
+    # Specification of main regression function
+    def f_reg(w):
+        res = 2.5 + 0.74 * w[:, 0] + 0.25 * w[:, 1] + 0.137 * (w[:, 2] + w[:, 3])
+        return res
+
+    # Specification of prop score function
+    def f_ps(w, xi):
+        res = xi * (-w[:, 0] + 0.1 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
+        return res
+
+    # observed covariates
+    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+    z_tilde_1 = np.exp(0.5 * x[:, 0])
+    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
+    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
+    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
+    z_tilde_5 = x[:, 4]
+    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, z_tilde_5))
+    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
+    # error terms and unobserved confounder
+    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
+    # unobserved confounder
+    a_bounds = (-1, 1)
+    a = np.random.uniform(low=a_bounds[0], high=a_bounds[1], size=n_obs)
+    var_a = np.square(a_bounds[1] - a_bounds[0]) / 12
+
+    # Choose the features used in the models
+    if linear:
+        features_ps = x
+        features_reg = x
+    else:
+        features_ps = z
+        features_reg = z
+
+    p = np.exp(f_ps(features_ps, xi)) / (1 + np.exp(f_ps(features_ps, xi)))
+    # compute short and long form of propensity score
+    m_long = p + gamma_a * a
+    m_short = p
+    # check propensity score bounds
+    if np.any(m_long < trimming_threshold) or np.any(m_long > 1.0 - trimming_threshold):
+        m_long = np.clip(m_long, trimming_threshold, 1.0 - trimming_threshold)
+        m_short = np.clip(m_short, trimming_threshold, 1.0 - trimming_threshold)
+        warnings.warn(
+            f"Propensity score is close to 0 or 1. "
+            f"Trimming is at {trimming_threshold} and {1.0 - trimming_threshold} is applied"
+        )
+    # generate treatment based on long form
+    u = np.random.uniform(low=0, high=1, size=n_obs)
+    d = 1.0 * (m_long >= u)
+    # add treatment heterogeneity
+    d1x = z[:, 4] + 1
+    var_dx = np.var(d * (d1x))
+    cov_adx = gamma_a * var_a
+    # Outcome regression
+    g_partial_reg = f_reg(features_reg)
+    # short model
+    g_short_d0 = g_partial_reg
+    g_short_d1 = (theta + beta_a * cov_adx / var_dx) * d1x + g_partial_reg
+    g_short = d * g_short_d1 + (1.0 - d) * g_short_d0
+    # long model
+    g_long_d0 = g_partial_reg + beta_a * a
+    g_long_d1 = theta * d1x + g_partial_reg + beta_a * a
+    g_long = d * g_long_d1 + (1.0 - d) * g_long_d0
+    # Potential outcomes
+    y_0 = g_long_d0 + eps_y
+    y_1 = g_long_d1 + eps_y
+    # Realized outcome
+    y = d * y_1 + (1.0 - d) * y_0
+    # In-sample values for confounding strength
+    explained_residual_variance = np.square(g_long - g_short)
+    residual_variance = np.square(y - g_short)
+    cf_y = np.mean(explained_residual_variance) / np.mean(residual_variance)
+    # compute the Riesz representation
+    treated_weight = d / np.mean(d)
+    untreated_weight = (1.0 - d) / np.mean(d)
+    # Odds ratios
+    propensity_ratio_long = m_long / (1.0 - m_long)
+    rr_long_ate = d / m_long - (1.0 - d) / (1.0 - m_long)
+    rr_long_atte = treated_weight - np.multiply(untreated_weight, propensity_ratio_long)
+    propensity_ratio_short = m_short / (1.0 - m_short)
+    rr_short_ate = d / m_short - (1.0 - d) / (1.0 - m_short)
+    rr_short_atte = treated_weight - np.multiply(untreated_weight, propensity_ratio_short)
+    cf_d_ate = (np.mean(1 / (m_long * (1 - m_long))) - np.mean(1 / (m_short * (1 - m_short)))) / np.mean(
+        1 / (m_long * (1 - m_long))
+    )
+    cf_d_atte = (np.mean(propensity_ratio_long) - np.mean(propensity_ratio_short)) / np.mean(propensity_ratio_long)
+    if (beta_a == 0) | (gamma_a == 0):
+        rho_ate = 0.0
+        rho_atte = 0.0
+    else:
+        rho_ate = np.corrcoef((g_long - g_short), (rr_long_ate - rr_short_ate))[0, 1]
+        rho_atte = np.corrcoef((g_long - g_short), (rr_long_atte - rr_short_atte))[0, 1]
+    oracle_values = {
+        "g_long": g_long,
+        "g_short": g_short,
+        "m_long": m_long,
+        "m_short": m_short,
+        "gamma_a": gamma_a,
+        "beta_a": beta_a,
+        "a": a,
+        "y_0": y_0,
+        "y_1": y_1,
+        "z": z,
+        "cf_y": cf_y,
+        "cf_d_ate": cf_d_ate,
+        "cf_d_atte": cf_d_atte,
+        "rho_ate": rho_ate,
+        "rho_atte": rho_atte,
+    }
+    res_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
+    return res_dict
diff --git a/doubleml/irm/datasets/dgp_heterogeneous_data.py b/doubleml/irm/datasets/dgp_heterogeneous_data.py
new file mode 100644
index 00000000..0f1a1b15
--- /dev/null
+++ b/doubleml/irm/datasets/dgp_heterogeneous_data.py
@@ -0,0 +1,114 @@
+import numpy as np
+import pandas as pd
+
+
+def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treatment=False):
+    """
+    Creates a simple synthetic example for heterogeneous treatment effects.
+    The data generating process is based on the Monte Carlo simulation from Oprescu et al. (2019).
+
+    The data is generated as
+
+    .. math::
+
+        Y_i & = \\theta_0(X_i)D_i + \\langle X_i,\\gamma_0\\rangle + \\epsilon_i
+
+        D_i & = \\langle X_i,\\beta_0\\rangle + \\eta_i,
+
+    where :math:`X_i\\sim\\mathcal{U}[0,1]^{p}` and :math:`\\epsilon_i,\\eta_i
+    \\sim\\mathcal{U}[-1,1]`.
+    If the treatment is set to be binary, the treatment is generated as
+
+    .. math::
+        D_i = 1\\{\\langle X_i,\\beta_0\\rangle \\ge \\eta_i\\}.
+
+    The coefficient vectors :math:`\\gamma_0` and :math:`\\beta_0` both have small random (identical) support
+    which values are drawn independently from :math:`\\mathcal{U}[0,1]` and :math:`\\mathcal{U}[0,0.3]`.
+    Further, :math:`\\theta_0(x)` defines the conditional treatment effect, which is defined differently depending
+    on the dimension of :math:`x`.
+
+    If the heterogeneity is univariate the conditional treatment effect takes the following form
+
+    .. math::
+            \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_0),
+
+    whereas for the two-dimensional case the conditional treatment effect is defined as
+
+    .. math::
+        \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_1).
+
+    Parameters
+    ----------
+    n_obs : int
+        Number of observations to simulate.
+        Default is ``200``.
+
+    p : int
+        Dimension of covariates.
+        Default is ``30``.
+
+    support_size : int
+        Number of relevant (confounding) covariates.
+        Default is ``5``.
+
+    n_x : int
+        Dimension of the heterogeneity. Can be either ``1`` or ``2``.
+        Default is ``1``.
+
+    binary_treatment : bool
+        Indicates whether the treatment is binary.
+        Default is ``False``.
+
+    Returns
+    -------
+    res_dict : dictionary
+       Dictionary with entries ``data``, ``effects``, ``treatment_effect``.
+
+    """
+    # simple input checks
+    assert n_x in [1, 2], "n_x must be either 1 or 2."
+    assert support_size <= p, "support_size must be smaller than p."
+    assert isinstance(binary_treatment, bool), "binary_treatment must be a boolean."
+
+    # define treatment effects
+    if n_x == 1:
+
+        def treatment_effect(x):
+            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 0])
+
+    else:
+        assert n_x == 2
+
+        # redefine treatment effect
+        def treatment_effect(x):
+            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 1])
+
+    # Outcome support and coefficients
+    support_y = np.random.choice(np.arange(p), size=support_size, replace=False)
+    coefs_y = np.random.uniform(0, 1, size=support_size)
+    # treatment support and coefficients
+    support_d = support_y
+    coefs_d = np.random.uniform(0, 0.3, size=support_size)
+
+    # noise
+    epsilon = np.random.uniform(-1, 1, size=n_obs)
+    eta = np.random.uniform(-1, 1, size=n_obs)
+
+    # Generate controls, covariates, treatments and outcomes
+    x = np.random.uniform(0, 1, size=(n_obs, p))
+    # Heterogeneous treatment effects
+    te = treatment_effect(x)
+    if binary_treatment:
+        d = 1.0 * (np.dot(x[:, support_d], coefs_d) >= eta)
+    else:
+        d = np.dot(x[:, support_d], coefs_d) + eta
+    y = te * d + np.dot(x[:, support_y], coefs_y) + epsilon
+
+    # Now we build the dataset
+    y_df = pd.DataFrame({"y": y})
+    d_df = pd.DataFrame({"d": d})
+    x_df = pd.DataFrame(data=x, index=np.arange(x.shape[0]), columns=[f"X_{i}" for i in range(x.shape[1])])
+
+    data = pd.concat([y_df, d_df, x_df], axis=1)
+    res_dict = {"data": data, "effects": te, "treatment_effect": treatment_effect}
+    return res_dict
diff --git a/doubleml/irm/datasets/dgp_iivm_data.py b/doubleml/irm/datasets/dgp_iivm_data.py
new file mode 100644
index 00000000..e8c1130f
--- /dev/null
+++ b/doubleml/irm/datasets/dgp_iivm_data.py
@@ -0,0 +1,102 @@
+import numpy as np
+import pandas as pd
+from scipy.linalg import toeplitz
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+
+_array_alias = _get_array_alias()
+_data_frame_alias = _get_data_frame_alias()
+_dml_data_alias = _get_dml_data_alias()
+
+
+def make_iivm_data(n_obs=500, dim_x=20, theta=1.0, alpha_x=0.2, return_type="DoubleMLData"):
+    """
+    Generates data from a interactive IV regression (IIVM) model.
+    The data generating process is defined as
+
+    .. math::
+
+        d_i &= 1\\left\\lbrace \\alpha_x Z + v_i > 0 \\right\\rbrace,
+
+        y_i &= \\theta d_i + x_i' \\beta + u_i,
+
+    with :math:`Z \\sim \\text{Bernoulli}(0.5)` and
+
+    .. math::
+
+        \\left(\\begin{matrix} u_i \\\\ v_i \\end{matrix} \\right) \\sim
+        \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & 0.3 \\\\ 0.3 & 1 \\end{matrix} \\right) \\right).
+
+    The covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
+    :math:`\\Sigma_{kj} = 0.5^{|j-k|}` and :math:`\\beta` is a `dim_x`-vector with entries
+    :math:`\\beta_j=\\frac{1}{j^2}`.
+
+    The data generating process is inspired by a process used in the simulation experiment of Farbmacher, Gruber and
+    Klaassen (2020).
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    alpha_x :
+        The value of the parameter :math:`\\alpha_x`.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z)``.
+
+    References
+    ----------
+    Farbmacher, H., Guber, R. and Klaaßen, S. (2020). Instrument Validity Tests with Causal Forests. MEA Discussion
+    Paper No. 13-2020. Available at SSRN: http://dx.doi.org/10.2139/ssrn.3619201.
+    """
+    # inspired by https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3619201
+    xx = np.random.multivariate_normal(
+        np.zeros(2),
+        np.array([[1.0, 0.3], [0.3, 1.0]]),
+        size=[
+            n_obs,
+        ],
+    )
+    u = xx[:, 0]
+    v = xx[:, 1]
+
+    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+
+    beta = [1 / (k**2) for k in range(1, dim_x + 1)]
+
+    z = np.random.binomial(
+        p=0.5,
+        n=1,
+        size=[
+            n_obs,
+        ],
+    )
+    d = 1.0 * (alpha_x * z + v > 0)
+    y = d * theta + np.dot(x, beta) + u
+
+    if return_type in _array_alias:
+        return x, y, d, z
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d", "z"])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, "y", "d", x_cols, "z")
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/irm/datasets/dgp_irm_data.py b/doubleml/irm/datasets/dgp_irm_data.py
new file mode 100644
index 00000000..973902ec
--- /dev/null
+++ b/doubleml/irm/datasets/dgp_irm_data.py
@@ -0,0 +1,103 @@
+import numpy as np
+import pandas as pd
+from scipy.linalg import toeplitz
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+
+_array_alias = _get_array_alias()
+_data_frame_alias = _get_data_frame_alias()
+_dml_data_alias = _get_dml_data_alias()
+
+
+def make_irm_data(n_obs=500, dim_x=20, theta=0, R2_d=0.5, R2_y=0.5, return_type="DoubleMLData"):
+    """
+    Generates data from a interactive regression (IRM) model.
+    The data generating process is defined as
+
+    .. math::
+
+        d_i &= 1\\left\\lbrace \\frac{\\exp(c_d x_i' \\beta)}{1+\\exp(c_d x_i' \\beta)} > v_i \\right\\rbrace, & &v_i
+        \\sim \\mathcal{U}(0,1),
+
+        y_i &= \\theta d_i + c_y x_i' \\beta d_i + \\zeta_i, & &\\zeta_i \\sim \\mathcal{N}(0,1),
+
+    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
+    :math:`\\Sigma_{kj} = 0.5^{|j-k|}`.
+    :math:`\\beta` is a `dim_x`-vector with entries :math:`\\beta_j=\\frac{1}{j^2}` and the constants :math:`c_y` and
+    :math:`c_d` are given by
+
+    .. math::
+
+        c_y = \\sqrt{\\frac{R_y^2}{(1-R_y^2) \\beta' \\Sigma \\beta}}, \\qquad c_d =
+        \\sqrt{\\frac{(\\pi^2 /3) R_d^2}{(1-R_d^2) \\beta' \\Sigma \\beta}}.
+
+    The data generating process is inspired by a process used in the simulation experiment (see Appendix P) of Belloni
+    et al. (2017).
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    R2_d :
+        The value of the parameter :math:`R_d^2`.
+    R2_y :
+        The value of the parameter :math:`R_y^2`.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
+
+    References
+    ----------
+    Belloni, A., Chernozhukov, V., Fernández‐Val, I. and Hansen, C. (2017). Program Evaluation and Causal Inference With
+    High‐Dimensional Data. Econometrica, 85: 233-298.
+    """
+    # inspired by https://onlinelibrary.wiley.com/doi/abs/10.3982/ECTA12723, see suplement
+    v = np.random.uniform(
+        size=[
+            n_obs,
+        ]
+    )
+    zeta = np.random.standard_normal(
+        size=[
+            n_obs,
+        ]
+    )
+
+    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+
+    beta = [1 / (k**2) for k in range(1, dim_x + 1)]
+    b_sigma_b = np.dot(np.dot(cov_mat, beta), beta)
+    c_y = np.sqrt(R2_y / ((1 - R2_y) * b_sigma_b))
+    c_d = np.sqrt(np.pi**2 / 3.0 * R2_d / ((1 - R2_d) * b_sigma_b))
+
+    xx = np.exp(np.dot(x, np.multiply(beta, c_d)))
+    d = 1.0 * ((xx / (1 + xx)) > v)
+
+    y = d * theta + d * np.dot(x, np.multiply(beta, c_y)) + zeta
+
+    if return_type in _array_alias:
+        return x, y, d
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, "y", "d", x_cols)
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/irm/datasets/dgp_irm_data_discrete_treatments.py b/doubleml/irm/datasets/dgp_irm_data_discrete_treatments.py
new file mode 100644
index 00000000..af621c9d
--- /dev/null
+++ b/doubleml/irm/datasets/dgp_irm_data_discrete_treatments.py
@@ -0,0 +1,164 @@
+import numpy as np
+from scipy.linalg import toeplitz
+
+
+def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, linear=False, random_state=None, **kwargs):
+    """
+    Generates data from a interactive regression (IRM) model with multiple treatment levels (based on an
+    underlying continous treatment).
+
+    The data generating process is defined as follows (similar to the Monte Carlo simulation used
+    in Sant'Anna and Zhao (2020)).
+
+    Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` corresponds
+    to the identity matrix.
+    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
+    where
+
+    .. math::
+
+            \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
+
+            \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
+
+            \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
+
+            \\tilde{Z}_4 &= (20 + X_2 + X_4)^2
+
+            \\tilde{Z}_5 &= X_5.
+
+    A continuous treatment :math:`D_{\\text{cont}}` is generated as
+
+    .. math::
+
+        D_{\\text{cont}} = \\xi (-Z_1 + 0.5 Z_2 - 0.25 Z_3 - 0.1 Z_4) + \\varepsilon_D,
+
+    where :math:`\\varepsilon_D \\sim \\mathcal{N}(0,1)` and :math:`\\xi=0.3`. The corresponding treatment
+    effect is defined as
+
+    .. math::
+
+        \\theta (d) = 0.1 \\exp(d) + 10 \\sin(0.7 d) + 2 d - 0.2 d^2.
+
+    Based on the continous treatment, a discrete treatment :math:`D` is generated as with a baseline level of
+    :math:`D=0` and additional levels based on the quantiles of :math:`D_{\\text{cont}}`. The number of levels
+    is defined by :math:`n_{\\text{levels}}`. Each level is chosen to have the same probability of being selected.
+
+    The potential outcomes are defined as
+
+    .. math::
+
+            Y(0) &= 210 + 27.4 Z_1 + 13.7 (Z_2 + Z_3 + Z_4) + \\varepsilon_Y
+
+            Y(1) &= \\theta (D_{\\text{cont}}) 1\\{D_{\\text{cont}} > 0\\} + Y(0),
+
+    where :math:`\\varepsilon_Y \\sim \\mathcal{N}(0,5)`. Further, the observed outcome is defined as
+
+    .. math::
+
+        Y = Y(1) 1\\{D > 0\\} + Y(0) 1\\{D = 0\\}.
+
+    The data is returned as a dictionary with the entries ``x``, ``y``, ``d`` and ``oracle_values``.
+
+    Parameters
+    ----------
+    n_obs : int
+        The number of observations to simulate.
+        Default is ``200``.
+
+    n_levels : int
+        The number of treatment levels.
+        Default is ``3``.
+
+    linear : bool
+        Indicates whether the true underlying regression is linear.
+        Default is ``False``.
+
+    random_state : int
+        Random seed for reproducibility.
+        Default is ``42``.
+
+    Returns
+    -------
+    res_dict : dictionary
+       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
+       The oracle values contain the continuous treatment, the level bounds, the potential level, ITE
+       and the potential outcome without treatment.
+
+    """
+    if random_state is not None:
+        np.random.seed(random_state)
+    xi = kwargs.get("xi", 0.3)
+    c = kwargs.get("c", 0.0)
+    dim_x = kwargs.get("dim_x", 5)
+
+    if not isinstance(n_levels, int):
+        raise ValueError("n_levels must be an integer.")
+    if n_levels < 2:
+        raise ValueError("n_levels must be at least 2.")
+
+    # observed covariates
+    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+
+    def f_reg(w):
+        res = 210 + 27.4 * w[:, 0] + 13.7 * (w[:, 1] + w[:, 2] + w[:, 3])
+        return res
+
+    def f_treatment(w, xi):
+        res = xi * (-w[:, 0] + 0.5 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
+        return res
+
+    def treatment_effect(d, scale=15):
+        return scale * (1 / (1 + np.exp(-d - 1.2 * np.cos(d)))) - 2
+
+    z_tilde_1 = np.exp(0.5 * x[:, 0])
+    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
+    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
+    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
+
+    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
+    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
+
+    # error terms
+    var_eps_y = 5
+    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
+    var_eps_d = 1
+    eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs)
+
+    if linear:
+        g = f_reg(x)
+        m = f_treatment(x, xi)
+    else:
+        assert not linear
+        g = f_reg(z)
+        m = f_treatment(z, xi)
+
+    cont_d = m + eps_d
+    level_bounds = np.quantile(cont_d, q=np.linspace(0, 1, n_levels + 1))
+    potential_level = sum([1.0 * (cont_d >= bound) for bound in level_bounds[1:-1]]) + 1
+    eta = np.random.uniform(0, 1, size=n_obs)
+    d = 1.0 * (eta >= 1 / n_levels) * potential_level
+
+    ite = treatment_effect(cont_d)
+    y0 = g + eps_y
+    # only treated for d > 0 compared to the baseline
+    y = ite * (d > 0) + y0
+
+    oracle_values = {
+        "cont_d": cont_d,
+        "level_bounds": level_bounds,
+        "potential_level": potential_level,
+        "ite": ite,
+        "y0": y0,
+    }
+
+    resul_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
+
+    return resul_dict
diff --git a/doubleml/irm/datasets/dgp_ssm_data.py b/doubleml/irm/datasets/dgp_ssm_data.py
new file mode 100644
index 00000000..6a6a5bee
--- /dev/null
+++ b/doubleml/irm/datasets/dgp_ssm_data.py
@@ -0,0 +1,102 @@
+import numpy as np
+import pandas as pd
+from scipy.linalg import toeplitz
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+
+_array_alias = _get_array_alias()
+_data_frame_alias = _get_data_frame_alias()
+_dml_data_alias = _get_dml_data_alias()
+
+
+def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type="DoubleMLData"):
+    """
+    Generates data from a sample selection model (SSM).
+    The data generating process is defined as
+
+    .. math::
+
+        y_i &= \\theta d_i + x_i' \\beta d_i + u_i,
+
+        s_i &= 1\\left\\lbrace d_i + \\gamma z_i + x_i' \\beta + v_i > 0 \\right\\rbrace,
+
+        d_i &= 1\\left\\lbrace x_i' \\beta + w_i > 0 \\right\\rbrace,
+
+    with Y being observed if :math:`s_i = 1` and covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma^2_x)`, where
+    :math:`\\Sigma^2_x` is a matrix with entries
+    :math:`\\Sigma_{kj} = 0.5^{|j-k|}`.
+    :math:`\\beta` is a `dim_x`-vector with entries :math:`\\beta_j=\\frac{0.4}{j^2}`
+    :math:`z_i \\sim \\mathcal{N}(0, 1)`,
+    :math:`(u_i,v_i) \\sim \\mathcal{N}(0, \\Sigma^2_{u,v})`,
+    :math:`w_i \\sim \\mathcal{N}(0, 1)`.
+
+
+    The data generating process is inspired by a process used in the simulation study (see Appendix E) of Bia,
+    Huber and Lafférs (2023).
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    mar:
+        Boolean. Indicates whether missingness at random holds.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z, s)``.
+
+    References
+    ----------
+    Michela Bia, Martin Huber & Lukáš Lafférs (2023) Double Machine Learning for Sample Selection Models,
+    Journal of Business & Economic Statistics, DOI: 10.1080/07350015.2023.2271071
+    """
+    if mar:
+        sigma = np.array([[1, 0], [0, 1]])
+        gamma = 0
+    else:
+        sigma = np.array([[1, 0.8], [0.8, 1]])
+        gamma = 1
+
+    e = np.random.multivariate_normal(mean=[0, 0], cov=sigma, size=n_obs).T
+
+    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+
+    beta = [0.4 / (k**2) for k in range(1, dim_x + 1)]
+
+    d = np.where(np.dot(x, beta) + np.random.randn(n_obs) > 0, 1, 0)
+    z = np.random.randn(n_obs)
+    s = np.where(np.dot(x, beta) + d + gamma * z + e[0] > 0, 1, 0)
+
+    y = np.dot(x, beta) + theta * d + e[1]
+    y[s == 0] = 0
+
+    if return_type in _array_alias:
+        return x, y, d, z, s
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        if mar:
+            data = pd.DataFrame(np.column_stack((x, y, d, s)), columns=x_cols + ["y", "d", "s"])
+        else:
+            data = pd.DataFrame(np.column_stack((x, y, d, z, s)), columns=x_cols + ["y", "d", "z", "s"])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            if mar:
+                return DoubleMLData(data, "y", "d", x_cols, None, None, "s")
+            return DoubleMLData(data, "y", "d", x_cols, "z", None, "s")
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/plm/datasets/__init__.py b/doubleml/plm/datasets/__init__.py
new file mode 100644
index 00000000..f8928902
--- /dev/null
+++ b/doubleml/plm/datasets/__init__.py
@@ -0,0 +1,20 @@
+"""
+The :mod:`doubleml.plm.datasets` module implements data generating processes for partially linear models.
+"""
+
+from .dgp_plr_CCDDHNR2018 import make_plr_CCDDHNR2018
+from .dgp_plr_turrell2018 import make_plr_turrell2018
+from .dgp_confounded_plr_data import make_confounded_plr_data
+from .dgp_pliv_CHS2015 import make_pliv_CHS2015
+from .dgp_pliv_multiway_cluster_CKMS2021 import make_pliv_multiway_cluster_CKMS2021
+from ._make_pliv_data import _make_pliv_data
+
+
+__all__ = [
+    "make_plr_CCDDHNR2018",
+    "make_plr_turrell2018",
+    "make_confounded_plr_data",
+    "make_pliv_CHS2015",
+    "make_pliv_multiway_cluster_CKMS2021",
+    "_make_pliv_data",
+]
diff --git a/doubleml/plm/datasets/_make_pliv_data.py b/doubleml/plm/datasets/_make_pliv_data.py
new file mode 100644
index 00000000..deb7cc53
--- /dev/null
+++ b/doubleml/plm/datasets/_make_pliv_data.py
@@ -0,0 +1,70 @@
+"""
+Helper function for partially linear IV data generation.
+"""
+
+import numpy as np
+import pandas as pd
+from sklearn.datasets import make_spd_matrix
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+
+_array_alias = _get_array_alias()
+_data_frame_alias = _get_data_frame_alias()
+_dml_data_alias = _get_dml_data_alias()
+
+
+def _g(x):
+    return np.power(np.sin(x), 2)
+
+
+def _m(x, nu=0.0, gamma=1.0):
+    return 0.5 / np.pi * (np.sinh(gamma)) / (np.cosh(gamma) - np.cos(x - nu))
+
+
+def _make_pliv_data(n_obs=100, dim_x=20, theta=0.5, gamma_z=0.4, return_type="DoubleMLData"):
+    b = [1 / k for k in range(1, dim_x + 1)]
+    sigma = make_spd_matrix(dim_x)
+
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        sigma,
+        size=[
+            n_obs,
+        ],
+    )
+    G = _g(np.dot(x, b))
+    # instrument
+    z = _m(np.dot(x, b)) + np.random.standard_normal(
+        size=[
+            n_obs,
+        ]
+    )
+    # treatment
+    M = _m(gamma_z * z + np.dot(x, b))
+    d = M + np.random.standard_normal(
+        size=[
+            n_obs,
+        ]
+    )
+    y = (
+        np.dot(theta, d)
+        + G
+        + np.random.standard_normal(
+            size=[
+                n_obs,
+            ]
+        )
+    )
+
+    if return_type in _array_alias:
+        return x, y, d, z
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d", "z"])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, "y", "d", x_cols, "z")
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/plm/datasets/dgp_confounded_plr_data.py b/doubleml/plm/datasets/dgp_confounded_plr_data.py
new file mode 100644
index 00000000..794e3db1
--- /dev/null
+++ b/doubleml/plm/datasets/dgp_confounded_plr_data.py
@@ -0,0 +1,171 @@
+import numpy as np
+from scipy.linalg import toeplitz
+from scipy.optimize import minimize_scalar
+
+
+def make_confounded_plr_data(n_obs=500, theta=5.0, cf_y=0.04, cf_d=0.04, **kwargs):
+    """
+    Generates counfounded data from an partially linear regression model.
+
+    The data generating process is defined as follows (similar to the Monte Carlo simulation used
+    in Sant'Anna and Zhao (2020)). Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`,
+    where  :math:`\\Sigma` is a matrix with entries
+    :math:`\\Sigma_{kj} = c^{|j-k|}`. The default value is  :math:`c = 0`, corresponding to the identity matrix.
+    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
+    where
+
+    .. math::
+
+        \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
+
+        \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
+
+        \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
+
+        \\tilde{Z}_4 &= (20 + X_2 + X_4)^2.
+
+    Additionally, generate a confounder :math:`A \\sim \\mathcal{U}[-1, 1]`.
+    At first, define the treatment as
+
+    .. math::
+
+        D = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4 + \\gamma_A \\cdot A + \\varepsilon_D
+
+    and with :math:`\\varepsilon \\sim \\mathcal{N}(0,1)`.
+    Since :math:`A` is independent of :math:`X`, the long and short form of the treatment regression are given as
+
+    .. math::
+
+        E[D|X,A] = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4 + \\gamma_A \\cdot A
+
+        E[D|X] = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4.
+
+    Further, generate the outcome of interest :math:`Y` as
+
+    .. math::
+
+        Y &= \\theta \\cdot D + g(Z) + \\beta_A \\cdot A + \\varepsilon
+
+        g(Z) &= 210 + 27.4 \\cdot Z_1 +13.7 \\cdot (Z_2 + Z_3 + Z_4)
+
+    where :math:`\\varepsilon \\sim \\mathcal{N}(0,5)`.
+    This implies an average treatment effect of :math:`\\theta`. Additionally, the long and short forms of
+    the conditional expectation take the following forms
+
+    .. math::
+
+        \\mathbb{E}[Y|D, X, A] &= \\theta \\cdot D + g(Z) + \\beta_A \\cdot A
+
+        \\mathbb{E}[Y|D, X] &= (\\theta + \\gamma_A\\beta_A \\frac{\\mathrm{Var}(A)}{\\mathrm{Var}(D)}) \\cdot D + g(Z).
+
+    Consequently, the strength of confounding is determined via :math:`\\gamma_A` and :math:`\\beta_A`.
+    Both are chosen to obtain the desired confounding of the outcome and Riesz Representer (in sample).
+
+    The observed data is given as :math:`W = (Y, D, X)`.
+    Further, orcale values of the confounder :math:`A`, the transformed covariated :math:`Z`, the effect :math:`\\theta`,
+    the coefficients :math:`\\gamma_a`, :math:`\\beta_a`, the long and short forms of the main regression and
+    the propensity score are returned in a dictionary.
+
+    Parameters
+    ----------
+    n_obs : int
+        The number of observations to simulate.
+        Default is ``500``.
+    theta : float or int
+        Average treatment effect.
+        Default is ``5.0``.
+    cf_y : float
+        Percentage of the residual variation of the outcome explained by latent/confounding variable.
+        Default is ``0.04``.
+    cf_d : float
+        Percentage gains in the variation of the Riesz Representer generated by latent/confounding variable.
+        Default is ``0.04``.
+
+    Returns
+    -------
+    res_dict : dictionary
+       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
+
+    References
+    ----------
+    Sant'Anna, P. H. and Zhao, J. (2020),
+    Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
+    doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
+    """
+    c = kwargs.get("c", 0.0)
+    dim_x = kwargs.get("dim_x", 4)
+
+    # observed covariates
+    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+
+    z_tilde_1 = np.exp(0.5 * x[:, 0])
+    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
+    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
+    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
+
+    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
+    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
+
+    # error terms
+    var_eps_y = 5
+    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
+    var_eps_d = 1
+    eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs)
+
+    # unobserved confounder
+    a_bounds = (-1, 1)
+    a = np.random.uniform(low=a_bounds[0], high=a_bounds[1], size=n_obs)
+    var_a = np.square(a_bounds[1] - a_bounds[0]) / 12
+
+    # get the required impact of the confounder on the propensity score
+    m_short = -z[:, 0] + 0.5 * z[:, 1] - 0.25 * z[:, 2] - 0.1 * z[:, 3]
+
+    def f_m(gamma_a):
+        rr_long = eps_d / var_eps_d
+        rr_short = (gamma_a * a + eps_d) / (gamma_a**2 * var_a + var_eps_d)
+        C2_D = (np.mean(np.square(rr_long)) - np.mean(np.square(rr_short))) / np.mean(np.square(rr_short))
+        return np.square(C2_D / (1 + C2_D) - cf_d)
+
+    gamma_a = minimize_scalar(f_m).x
+    m_long = m_short + gamma_a * a
+    d = m_long + eps_d
+
+    # short and long version of g
+    g_partial_reg = 210 + 27.4 * z[:, 0] + 13.7 * (z[:, 1] + z[:, 2] + z[:, 3])
+
+    var_d = np.var(d)
+
+    def f_g(beta_a):
+        g_diff = beta_a * (a - gamma_a * (var_a / var_d) * d)
+        y_diff = eps_y + g_diff
+        return np.square(np.mean(np.square(g_diff)) / np.mean(np.square(y_diff)) - cf_y)
+
+    beta_a = minimize_scalar(f_g).x
+
+    g_long = theta * d + g_partial_reg + beta_a * a
+    g_short = (theta + gamma_a * beta_a * var_a / var_d) * d + g_partial_reg
+
+    y = g_long + eps_y
+
+    oracle_values = {
+        "g_long": g_long,
+        "g_short": g_short,
+        "m_long": m_long,
+        "m_short": m_short,
+        "theta": theta,
+        "gamma_a": gamma_a,
+        "beta_a": beta_a,
+        "a": a,
+        "z": z,
+    }
+
+    res_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
+
+    return res_dict
diff --git a/doubleml/plm/datasets/dgp_pliv_CHS2015.py b/doubleml/plm/datasets/dgp_pliv_CHS2015.py
new file mode 100644
index 00000000..7542803a
--- /dev/null
+++ b/doubleml/plm/datasets/dgp_pliv_CHS2015.py
@@ -0,0 +1,108 @@
+import numpy as np
+import pandas as pd
+from scipy.linalg import toeplitz
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _array_alias, _data_frame_alias, _dml_data_alias
+
+
+def make_pliv_CHS2015(n_obs, alpha=1.0, dim_x=200, dim_z=150, return_type="DoubleMLData"):
+    """
+    Generates data from a partially linear IV regression model used in Chernozhukov, Hansen and Spindler (2015).
+    The data generating process is defined as
+
+    .. math::
+
+        z_i &= \\Pi x_i + \\zeta_i,
+
+        d_i &= x_i' \\gamma + z_i' \\delta + u_i,
+
+        y_i &= \\alpha d_i + x_i' \\beta + \\varepsilon_i,
+
+    with
+
+    .. math::
+
+        \\left(\\begin{matrix} \\varepsilon_i \\\\ u_i \\\\ \\zeta_i \\\\ x_i \\end{matrix} \\right) \\sim
+        \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & 0.6 & 0 & 0 \\\\ 0.6 & 1 & 0 & 0 \\\\
+        0 & 0 & 0.25 I_{p_n^z} & 0 \\\\ 0 & 0 & 0 & \\Sigma \\end{matrix} \\right) \\right)
+
+    where  :math:`\\Sigma` is a :math:`p_n^x \\times p_n^x` matrix with entries
+    :math:`\\Sigma_{kj} = 0.5^{|j-k|}` and :math:`I_{p_n^z}` is the :math:`p_n^z \\times p_n^z` identity matrix.
+    :math:`\\beta = \\gamma` is a :math:`p_n^x`-vector with entries :math:`\\beta_j=\\frac{1}{j^2}`,
+    :math:`\\delta` is a :math:`p_n^z`-vector with entries :math:`\\delta_j=\\frac{1}{j^2}`
+    and :math:`\\Pi = (I_{p_n^z}, 0_{p_n^z \\times (p_n^x - p_n^z)})`.
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    alpha :
+        The value of the causal parameter.
+    dim_x :
+        The number of covariates.
+    dim_z :
+        The number of instruments.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z)``.
+
+    References
+    ----------
+    Chernozhukov, V., Hansen, C. and Spindler, M. (2015), Post-Selection and Post-Regularization Inference in Linear
+    Models with Many Controls and Instruments. American Economic Review: Papers and Proceedings, 105 (5): 486-90.
+    """
+    assert dim_x >= dim_z
+    # see https://assets.aeaweb.org/asset-server/articles-attachments/aer/app/10505/P2015_1022_app.pdf
+    xx = np.random.multivariate_normal(
+        np.zeros(2),
+        np.array([[1.0, 0.6], [0.6, 1.0]]),
+        size=[
+            n_obs,
+        ],
+    )
+    epsilon = xx[:, 0]
+    u = xx[:, 1]
+
+    sigma = toeplitz([np.power(0.5, k) for k in range(0, dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        sigma,
+        size=[
+            n_obs,
+        ],
+    )
+
+    I_z = np.eye(dim_z)
+    xi = np.random.multivariate_normal(
+        np.zeros(dim_z),
+        0.25 * I_z,
+        size=[
+            n_obs,
+        ],
+    )
+
+    beta = [1 / (k**2) for k in range(1, dim_x + 1)]
+    gamma = beta
+    delta = [1 / (k**2) for k in range(1, dim_z + 1)]
+    Pi = np.hstack((I_z, np.zeros((dim_z, dim_x - dim_z))))
+
+    z = np.dot(x, np.transpose(Pi)) + xi
+    d = np.dot(x, gamma) + np.dot(z, delta) + u
+    y = alpha * d + np.dot(x, beta) + epsilon
+
+    if return_type in _array_alias:
+        return x, y, d, z
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        z_cols = [f"Z{i + 1}" for i in np.arange(dim_z)]
+        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d"] + z_cols)
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, "y", "d", x_cols, z_cols)
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py b/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
new file mode 100644
index 00000000..df2b4cbe
--- /dev/null
+++ b/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
@@ -0,0 +1,199 @@
+import numpy as np
+import pandas as pd
+from scipy.linalg import toeplitz
+
+from doubleml.data import DoubleMLClusterData
+from doubleml.utils._aliases import _array_alias, _data_frame_alias, _dml_cluster_data_alias
+
+
+def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1.0, return_type="DoubleMLClusterData", **kwargs):
+    """
+    Generates data from a partially linear IV regression model with multiway cluster sample used in Chiang et al.
+    (2021). The data generating process is defined as
+
+    .. math::
+
+        Z_{ij} &= X_{ij}' \\xi_0 + V_{ij},
+
+        D_{ij} &= Z_{ij}' \\pi_{10} + X_{ij}' \\pi_{20} + v_{ij},
+
+        Y_{ij} &= D_{ij} \\theta + X_{ij}' \\zeta_0 + \\varepsilon_{ij},
+
+    with
+
+    .. math::
+
+        X_{ij} &= (1 - \\omega_1^X - \\omega_2^X) \\alpha_{ij}^X
+        + \\omega_1^X \\alpha_{i}^X + \\omega_2^X \\alpha_{j}^X,
+
+        \\varepsilon_{ij} &= (1 - \\omega_1^\\varepsilon - \\omega_2^\\varepsilon) \\alpha_{ij}^\\varepsilon
+        + \\omega_1^\\varepsilon \\alpha_{i}^\\varepsilon + \\omega_2^\\varepsilon \\alpha_{j}^\\varepsilon,
+
+        v_{ij} &= (1 - \\omega_1^v - \\omega_2^v) \\alpha_{ij}^v
+        + \\omega_1^v \\alpha_{i}^v + \\omega_2^v \\alpha_{j}^v,
+
+        V_{ij} &= (1 - \\omega_1^V - \\omega_2^V) \\alpha_{ij}^V
+        + \\omega_1^V \\alpha_{i}^V + \\omega_2^V \\alpha_{j}^V,
+
+    and :math:`\\alpha_{ij}^X, \\alpha_{i}^X, \\alpha_{j}^X \\sim \\mathcal{N}(0, \\Sigma)`
+    where  :math:`\\Sigma` is a :math:`p_x \\times p_x` matrix with entries
+    :math:`\\Sigma_{kj} = s_X^{|j-k|}`.
+    Further
+
+    .. math::
+
+        \\left(\\begin{matrix} \\alpha_{ij}^\\varepsilon \\\\ \\alpha_{ij}^v \\end{matrix}\\right),
+        \\left(\\begin{matrix} \\alpha_{i}^\\varepsilon \\\\ \\alpha_{i}^v \\end{matrix}\\right),
+        \\left(\\begin{matrix} \\alpha_{j}^\\varepsilon \\\\ \\alpha_{j}^v \\end{matrix}\\right)
+        \\sim \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & s_{\\varepsilon v} \\\\
+        s_{\\varepsilon v} & 1 \\end{matrix} \\right) \\right)
+
+
+    and :math:`\\alpha_{ij}^V, \\alpha_{i}^V, \\alpha_{j}^V \\sim \\mathcal{N}(0, 1)`.
+
+    Parameters
+    ----------
+    N :
+        The number of observations (first dimension).
+    M :
+        The number of observations (second dimension).
+    dim_X :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    return_type :
+        If ``'DoubleMLClusterData'`` or ``DoubleMLClusterData``, returns a ``DoubleMLClusterData`` object where
+        ``DoubleMLClusterData.data`` is a ``pd.DataFrame``.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s
+        ``(x, y, d, cluster_vars, z)``.
+    **kwargs
+        Additional keyword arguments to set non-default values for the parameters
+        :math:`\\pi_{10}=1.0`, :math:`\\omega_X = \\omega_{\\varepsilon} = \\omega_V = \\omega_v = (0.25, 0.25)`,
+        :math:`s_X = s_{\\varepsilon v} = 0.25`,
+        or the :math:`p_x`-vectors :math:`\\zeta_0 = \\pi_{20} = \\xi_0` with default entries
+        :math:`(\\zeta_{0})_j = 0.5^j`.
+
+    References
+    ----------
+    Chiang, H. D., Kato K., Ma, Y. and Sasaki, Y. (2021), Multiway Cluster Robust Double/Debiased Machine Learning,
+    Journal of Business & Economic Statistics,
+    doi: `10.1080/07350015.2021.1895815 <https://doi.org/10.1080/07350015.2021.1895815>`_,
+    arXiv:`1909.03489 <https://arxiv.org/abs/1909.03489>`_.
+    """
+    # additional parameters specifiable via kwargs
+    pi_10 = kwargs.get("pi_10", 1.0)
+
+    xx = np.arange(1, dim_X + 1)
+    zeta_0 = kwargs.get("zeta_0", np.power(0.5, xx))
+    pi_20 = kwargs.get("pi_20", np.power(0.5, xx))
+    xi_0 = kwargs.get("xi_0", np.power(0.5, xx))
+
+    omega_X = kwargs.get("omega_X", np.array([0.25, 0.25]))
+    omega_epsilon = kwargs.get("omega_epsilon", np.array([0.25, 0.25]))
+    omega_v = kwargs.get("omega_v", np.array([0.25, 0.25]))
+    omega_V = kwargs.get("omega_V", np.array([0.25, 0.25]))
+
+    s_X = kwargs.get("s_X", 0.25)
+    s_epsilon_v = kwargs.get("s_epsilon_v", 0.25)
+
+    # use np.tile() and np.repeat() for repeating vectors in different styles, i.e.,
+    # np.tile([v1, v2, v3], 2) [v1, v2, v3, v1, v2, v3]
+    # np.repeat([v1, v2, v3], 2) [v1, v1, v2, v2, v3, v3]
+
+    alpha_V = np.random.normal(size=(N * M))
+    alpha_V_i = np.repeat(np.random.normal(size=N), M)
+    alpha_V_j = np.tile(np.random.normal(size=M), N)
+
+    cov_mat = np.array([[1, s_epsilon_v], [s_epsilon_v, 1]])
+    alpha_eps_v = np.random.multivariate_normal(
+        np.zeros(2),
+        cov_mat,
+        size=[
+            N * M,
+        ],
+    )
+    alpha_eps = alpha_eps_v[:, 0]
+    alpha_v = alpha_eps_v[:, 1]
+
+    alpha_eps_v_i = np.random.multivariate_normal(
+        np.zeros(2),
+        cov_mat,
+        size=[
+            N,
+        ],
+    )
+    alpha_eps_i = np.repeat(alpha_eps_v_i[:, 0], M)
+    alpha_v_i = np.repeat(alpha_eps_v_i[:, 1], M)
+
+    alpha_eps_v_j = np.random.multivariate_normal(
+        np.zeros(2),
+        cov_mat,
+        size=[
+            M,
+        ],
+    )
+    alpha_eps_j = np.tile(alpha_eps_v_j[:, 0], N)
+    alpha_v_j = np.tile(alpha_eps_v_j[:, 1], N)
+
+    cov_mat = toeplitz([np.power(s_X, k) for k in range(dim_X)])
+    alpha_X = np.random.multivariate_normal(
+        np.zeros(dim_X),
+        cov_mat,
+        size=[
+            N * M,
+        ],
+    )
+    alpha_X_i = np.repeat(
+        np.random.multivariate_normal(
+            np.zeros(dim_X),
+            cov_mat,
+            size=[
+                N,
+            ],
+        ),
+        M,
+        axis=0,
+    )
+    alpha_X_j = np.tile(
+        np.random.multivariate_normal(
+            np.zeros(dim_X),
+            cov_mat,
+            size=[
+                M,
+            ],
+        ),
+        (N, 1),
+    )
+
+    # generate variables
+    x = (1 - omega_X[0] - omega_X[1]) * alpha_X + omega_X[0] * alpha_X_i + omega_X[1] * alpha_X_j
+
+    eps = (
+        (1 - omega_epsilon[0] - omega_epsilon[1]) * alpha_eps + omega_epsilon[0] * alpha_eps_i + omega_epsilon[1] * alpha_eps_j
+    )
+
+    v = (1 - omega_v[0] - omega_v[1]) * alpha_v + omega_v[0] * alpha_v_i + omega_v[1] * alpha_v_j
+
+    V = (1 - omega_V[0] - omega_V[1]) * alpha_V + omega_V[0] * alpha_V_i + omega_V[1] * alpha_V_j
+
+    z = np.matmul(x, xi_0) + V
+    d = z * pi_10 + np.matmul(x, pi_20) + v
+    y = d * theta + np.matmul(x, zeta_0) + eps
+
+    cluster_cols = ["cluster_var_i", "cluster_var_j"]
+    cluster_vars = pd.MultiIndex.from_product([range(N), range(M)]).to_frame(name=cluster_cols).reset_index(drop=True)
+
+    if return_type in _array_alias:
+        return x, y, d, cluster_vars.values, z
+    elif return_type in _data_frame_alias + _dml_cluster_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_X)]
+        data = pd.concat((cluster_vars, pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["Y", "D", "Z"])), axis=1)
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLClusterData(data, "Y", "D", cluster_cols, x_cols, "Z")
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/plm/datasets/dgp_plr_CCDDHNR2018.py b/doubleml/plm/datasets/dgp_plr_CCDDHNR2018.py
new file mode 100644
index 00000000..7d6fdf9e
--- /dev/null
+++ b/doubleml/plm/datasets/dgp_plr_CCDDHNR2018.py
@@ -0,0 +1,108 @@
+import numpy as np
+import pandas as pd
+from scipy.linalg import toeplitz
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+
+_array_alias = _get_array_alias()
+_data_frame_alias = _get_data_frame_alias()
+_dml_data_alias = _get_dml_data_alias()
+
+
+def make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type="DoubleMLData", **kwargs):
+    """
+    Generates data from a partially linear regression model used in Chernozhukov et al. (2018) for Figure 1.
+    The data generating process is defined as
+
+    .. math::
+
+        d_i &= m_0(x_i) + s_1 v_i, & &v_i \\sim \\mathcal{N}(0,1),
+
+        y_i &= \\alpha d_i + g_0(x_i) + s_2 \\zeta_i, & &\\zeta_i \\sim \\mathcal{N}(0,1),
+
+
+    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
+    :math:`\\Sigma_{kj} = 0.7^{|j-k|}`.
+    The nuisance functions are given by
+
+    .. math::
+
+        m_0(x_i) &= a_0 x_{i,1} + a_1 \\frac{\\exp(x_{i,3})}{1+\\exp(x_{i,3})},
+
+        g_0(x_i) &= b_0 \\frac{\\exp(x_{i,1})}{1+\\exp(x_{i,1})} + b_1 x_{i,3}.
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    alpha :
+        The value of the causal parameter.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
+    **kwargs
+        Additional keyword arguments to set non-default values for the parameters
+        :math:`a_0=1`, :math:`a_1=0.25`, :math:`s_1=1`, :math:`b_0=1`, :math:`b_1=0.25` or :math:`s_2=1`.
+
+    References
+    ----------
+    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
+    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
+    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
+    """
+    a_0 = kwargs.get("a_0", 1.0)
+    a_1 = kwargs.get("a_1", 0.25)
+    s_1 = kwargs.get("s_1", 1.0)
+
+    b_0 = kwargs.get("b_0", 1.0)
+    b_1 = kwargs.get("b_1", 0.25)
+    s_2 = kwargs.get("s_2", 1.0)
+
+    cov_mat = toeplitz([np.power(0.7, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+
+    d = (
+        a_0 * x[:, 0]
+        + a_1 * np.divide(np.exp(x[:, 2]), 1 + np.exp(x[:, 2]))
+        + s_1
+        * np.random.standard_normal(
+            size=[
+                n_obs,
+            ]
+        )
+    )
+    y = (
+        alpha * d
+        + b_0 * np.divide(np.exp(x[:, 0]), 1 + np.exp(x[:, 0]))
+        + b_1 * x[:, 2]
+        + s_2
+        * np.random.standard_normal(
+            size=[
+                n_obs,
+            ]
+        )
+    )
+
+    if return_type in _array_alias:
+        return x, y, d
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, "y", "d", x_cols)
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/plm/datasets/dgp_plr_turrell2018.py b/doubleml/plm/datasets/dgp_plr_turrell2018.py
new file mode 100644
index 00000000..5cfefdd8
--- /dev/null
+++ b/doubleml/plm/datasets/dgp_plr_turrell2018.py
@@ -0,0 +1,107 @@
+import numpy as np
+import pandas as pd
+from sklearn.datasets import make_spd_matrix
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+
+_array_alias = _get_array_alias()
+_data_frame_alias = _get_data_frame_alias()
+_dml_data_alias = _get_dml_data_alias()
+
+
+def _g(x):
+    return np.power(np.sin(x), 2)
+
+
+def _m(x, nu=0.0, gamma=1.0):
+    return 0.5 / np.pi * (np.sinh(gamma)) / (np.cosh(gamma) - np.cos(x - nu))
+
+
+def make_plr_turrell2018(n_obs=100, dim_x=20, theta=0.5, return_type="DoubleMLData", **kwargs):
+    """
+    Generates data from a partially linear regression model used in a blog article by Turrell (2018).
+    The data generating process is defined as
+
+    .. math::
+
+        d_i &= m_0(x_i' b) + v_i, & &v_i \\sim \\mathcal{N}(0,1),
+
+        y_i &= \\theta d_i + g_0(x_i' b) + u_i, & &u_i \\sim \\mathcal{N}(0,1),
+
+
+    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a random symmetric,
+    positive-definite matrix generated with :py:meth:`sklearn.datasets.make_spd_matrix`.
+    :math:`b` is a vector with entries :math:`b_j=\\frac{1}{j}` and the nuisance functions are given by
+
+    .. math::
+
+        m_0(x_i) &= \\frac{1}{2 \\pi} \\frac{\\sinh(\\gamma)}{\\cosh(\\gamma) - \\cos(x_i-\\nu)},
+
+        g_0(x_i) &= \\sin(x_i)^2.
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
+    **kwargs
+        Additional keyword arguments to set non-default values for the parameters
+        :math:`\\nu=0`, or :math:`\\gamma=1`.
+
+    References
+    ----------
+    Turrell, A. (2018), Econometrics in Python part I - Double machine learning, Markov Wanderer: A blog on economics,
+    science, coding and data. `https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/
+    <https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/>`_.
+    """
+    nu = kwargs.get("nu", 0.0)
+    gamma = kwargs.get("gamma", 1.0)
+
+    b = [1 / k for k in range(1, dim_x + 1)]
+    sigma = make_spd_matrix(dim_x)
+
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        sigma,
+        size=[
+            n_obs,
+        ],
+    )
+    G = _g(np.dot(x, b))
+    M = _m(np.dot(x, b), nu=nu, gamma=gamma)
+    d = M + np.random.standard_normal(
+        size=[
+            n_obs,
+        ]
+    )
+    y = (
+        np.dot(theta, d)
+        + G
+        + np.random.standard_normal(
+            size=[
+                n_obs,
+            ]
+        )
+    )
+
+    if return_type in _array_alias:
+        return x, y, d
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, "y", "d", x_cols)
+    else:
+        raise ValueError("Invalid return_type.")

From 56d832c372fb2637632dc1711455a1239574c9e0 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 4 Jun 2025 14:41:02 +0200
Subject: [PATCH 09/84] update tests acc. to Refactor Data Generators #306

---
 doubleml/plm/tests/conftest.py                      |  2 +-
 .../plm/tests/test_pliv_external_predictions.py     |  2 +-
 doubleml/plm/tests/test_plr_external_predictions.py |  2 +-
 doubleml/tests/conftest.py                          |  2 +-
 doubleml/tests/test_datasets.py                     | 13 +++++++------
 doubleml/tests/test_evaluate_learner.py             |  2 +-
 doubleml/tests/test_exceptions.py                   |  9 ++-------
 doubleml/tests/test_exceptions_ext_preds.py         |  2 +-
 doubleml/tests/test_framework.py                    |  2 +-
 doubleml/tests/test_model_defaults.py               |  9 ++-------
 doubleml/tests/test_multiway_cluster.py             |  2 +-
 doubleml/tests/test_nonlinear_cluster.py            |  3 ++-
 doubleml/tests/test_return_types.py                 | 10 ++--------
 doubleml/tests/test_scores.py                       |  3 ++-
 doubleml/tests/test_sensitivity.py                  |  2 +-
 doubleml/tests/test_sensitivity_cluster.py          |  2 +-
 doubleml/tests/test_set_ml_nuisance_params.py       |  3 ++-
 doubleml/tests/test_set_sample_splitting.py         |  2 +-
 18 files changed, 30 insertions(+), 42 deletions(-)

diff --git a/doubleml/plm/tests/conftest.py b/doubleml/plm/tests/conftest.py
index 497d6fc9..cfde0f41 100644
--- a/doubleml/plm/tests/conftest.py
+++ b/doubleml/plm/tests/conftest.py
@@ -4,7 +4,7 @@
 from scipy.linalg import toeplitz
 from sklearn.datasets import make_spd_matrix
 
-from doubleml.datasets import make_pliv_CHS2015, make_plr_turrell2018
+from doubleml.plm.datasets import make_pliv_CHS2015, make_plr_turrell2018
 
 
 def _g(x):
diff --git a/doubleml/plm/tests/test_pliv_external_predictions.py b/doubleml/plm/tests/test_pliv_external_predictions.py
index bc8a1e8a..55c362ab 100644
--- a/doubleml/plm/tests/test_pliv_external_predictions.py
+++ b/doubleml/plm/tests/test_pliv_external_predictions.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression
 
 from doubleml import DoubleMLData, DoubleMLPLIV
-from doubleml.datasets import make_pliv_CHS2015
+from doubleml.plm.datasets import make_pliv_CHS2015
 from doubleml.utils import DMLDummyRegressor
 
 
diff --git a/doubleml/plm/tests/test_plr_external_predictions.py b/doubleml/plm/tests/test_plr_external_predictions.py
index 47644555..160052b1 100644
--- a/doubleml/plm/tests/test_plr_external_predictions.py
+++ b/doubleml/plm/tests/test_plr_external_predictions.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression
 
 from doubleml import DoubleMLData, DoubleMLPLR
-from doubleml.datasets import make_plr_CCDDHNR2018
+from doubleml.plm.datasets import make_plr_CCDDHNR2018
 from doubleml.utils import DMLDummyRegressor
 
 
diff --git a/doubleml/tests/conftest.py b/doubleml/tests/conftest.py
index bf53d788..6abea18c 100644
--- a/doubleml/tests/conftest.py
+++ b/doubleml/tests/conftest.py
@@ -4,7 +4,7 @@
 from sklearn.datasets import make_classification, make_regression, make_spd_matrix
 
 from doubleml import DoubleMLData
-from doubleml.datasets import make_pliv_CHS2015, make_plr_turrell2018
+from doubleml.plm.datasets import make_pliv_CHS2015, make_plr_turrell2018
 
 
 def _g(x):
diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py
index 67f612e8..8f1c4f03 100644
--- a/doubleml/tests/test_datasets.py
+++ b/doubleml/tests/test_datasets.py
@@ -3,21 +3,22 @@
 import pytest
 
 from doubleml import DoubleMLClusterData, DoubleMLData
-from doubleml.datasets import (
-    _make_pliv_data,
-    fetch_401K,
-    fetch_bonus,
+from doubleml.datasets import fetch_401K, fetch_bonus
+from doubleml.irm.datasets import (
     make_confounded_irm_data,
-    make_confounded_plr_data,
     make_heterogeneous_data,
     make_iivm_data,
     make_irm_data,
     make_irm_data_discrete_treatments,
+    make_ssm_data,
+)
+from doubleml.plm.datasets import (
+    _make_pliv_data,
+    make_confounded_plr_data,
     make_pliv_CHS2015,
     make_pliv_multiway_cluster_CKMS2021,
     make_plr_CCDDHNR2018,
     make_plr_turrell2018,
-    make_ssm_data,
 )
 
 msg_inv_return_type = "Invalid return_type."
diff --git a/doubleml/tests/test_evaluate_learner.py b/doubleml/tests/test_evaluate_learner.py
index dbad9b62..2c5d3f9a 100644
--- a/doubleml/tests/test_evaluate_learner.py
+++ b/doubleml/tests/test_evaluate_learner.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 from doubleml.utils._estimation import _logloss
 
 np.random.seed(3141)
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index a4655bb9..d8fe4e7c 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -21,13 +21,8 @@
     DoubleMLPQ,
     DoubleMLQTE,
 )
-from doubleml.datasets import (
-    make_iivm_data,
-    make_irm_data,
-    make_pliv_CHS2015,
-    make_pliv_multiway_cluster_CKMS2021,
-    make_plr_CCDDHNR2018,
-)
+from doubleml.irm.datasets import make_iivm_data, make_irm_data
+from doubleml.plm.datasets import make_pliv_CHS2015, make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018
 from doubleml.did.datasets import make_did_SZ2020
 
 from ._utils import DummyDataClass
diff --git a/doubleml/tests/test_exceptions_ext_preds.py b/doubleml/tests/test_exceptions_ext_preds.py
index 3f600282..a65b6ebb 100644
--- a/doubleml/tests/test_exceptions_ext_preds.py
+++ b/doubleml/tests/test_exceptions_ext_preds.py
@@ -2,7 +2,7 @@
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
 from doubleml import DoubleMLCVAR, DoubleMLData, DoubleMLIRM, DoubleMLQTE
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 df_irm = make_irm_data(n_obs=10, dim_x=2, theta=0.5, return_type="DataFrame")
diff --git a/doubleml/tests/test_framework.py b/doubleml/tests/test_framework.py
index 24810b68..44dabb71 100644
--- a/doubleml/tests/test_framework.py
+++ b/doubleml/tests/test_framework.py
@@ -3,7 +3,7 @@
 import pytest
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 from doubleml.double_ml_framework import DoubleMLFramework, concat
 from doubleml.irm.irm import DoubleMLIRM
 
diff --git a/doubleml/tests/test_model_defaults.py b/doubleml/tests/test_model_defaults.py
index f55a555c..8417468a 100644
--- a/doubleml/tests/test_model_defaults.py
+++ b/doubleml/tests/test_model_defaults.py
@@ -4,13 +4,8 @@
 from sklearn.linear_model import Lasso, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import (
-    make_iivm_data,
-    make_irm_data,
-    make_pliv_CHS2015,
-    make_plr_CCDDHNR2018,
-    make_ssm_data,
-)
+from doubleml.irm.datasets import make_iivm_data, make_irm_data, make_ssm_data
+from doubleml.plm.datasets import make_pliv_CHS2015, make_plr_CCDDHNR2018
 from doubleml.did.datasets import make_did_SZ2020
 
 np.random.seed(3141)
diff --git a/doubleml/tests/test_multiway_cluster.py b/doubleml/tests/test_multiway_cluster.py
index b064024f..10e5d445 100644
--- a/doubleml/tests/test_multiway_cluster.py
+++ b/doubleml/tests/test_multiway_cluster.py
@@ -6,7 +6,7 @@
 from sklearn.linear_model import Lasso, LinearRegression
 
 import doubleml as dml
-from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021
+from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
 
 from ..plm.tests._utils_pliv_manual import compute_pliv_residuals, fit_pliv
 from ._utils import _clone
diff --git a/doubleml/tests/test_nonlinear_cluster.py b/doubleml/tests/test_nonlinear_cluster.py
index f84f3e2e..71998941 100644
--- a/doubleml/tests/test_nonlinear_cluster.py
+++ b/doubleml/tests/test_nonlinear_cluster.py
@@ -7,7 +7,8 @@
 from sklearn.linear_model import Lasso, LinearRegression
 
 import doubleml as dml
-from doubleml.datasets import DoubleMLClusterData, make_pliv_multiway_cluster_CKMS2021
+from doubleml import DoubleMLClusterData
+from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
 
 from .test_nonlinear_score_mixin import DoubleMLPLRWithNonLinearScoreMixin
 
diff --git a/doubleml/tests/test_return_types.py b/doubleml/tests/test_return_types.py
index 11ebd624..03676b74 100644
--- a/doubleml/tests/test_return_types.py
+++ b/doubleml/tests/test_return_types.py
@@ -23,14 +23,8 @@
     DoubleMLPQ,
     DoubleMLSSM,
 )
-from doubleml.datasets import (
-    make_iivm_data,
-    make_irm_data,
-    make_pliv_CHS2015,
-    make_pliv_multiway_cluster_CKMS2021,
-    make_plr_CCDDHNR2018,
-    make_ssm_data,
-)
+from doubleml.irm.datasets import make_iivm_data, make_irm_data, make_ssm_data
+from doubleml.plm.datasets import make_pliv_CHS2015, make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018
 from doubleml.did.datasets import make_did_SZ2020
 
 np.random.seed(3141)
diff --git a/doubleml/tests/test_scores.py b/doubleml/tests/test_scores.py
index c3281702..0687546d 100644
--- a/doubleml/tests/test_scores.py
+++ b/doubleml/tests/test_scores.py
@@ -3,7 +3,8 @@
 from sklearn.linear_model import Lasso, LogisticRegression
 
 from doubleml import DoubleMLIIVM, DoubleMLIRM, DoubleMLPLIV, DoubleMLPLR
-from doubleml.datasets import make_iivm_data, make_irm_data, make_pliv_CHS2015, make_plr_CCDDHNR2018
+from doubleml.irm.datasets import make_iivm_data, make_irm_data
+from doubleml.plm.datasets import make_pliv_CHS2015, make_plr_CCDDHNR2018
 
 np.random.seed(3141)
 dml_data_plr = make_plr_CCDDHNR2018(n_obs=100)
diff --git a/doubleml/tests/test_sensitivity.py b/doubleml/tests/test_sensitivity.py
index e4b43495..a0e47c0d 100644
--- a/doubleml/tests/test_sensitivity.py
+++ b/doubleml/tests/test_sensitivity.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 
 from ._utils_doubleml_sensitivity_manual import doubleml_sensitivity_benchmark_manual, doubleml_sensitivity_manual
 
diff --git a/doubleml/tests/test_sensitivity_cluster.py b/doubleml/tests/test_sensitivity_cluster.py
index 65ec0d64..83f8c270 100644
--- a/doubleml/tests/test_sensitivity_cluster.py
+++ b/doubleml/tests/test_sensitivity_cluster.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression
 
 import doubleml as dml
-from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021
+from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
 
 from ._utils_doubleml_sensitivity_manual import doubleml_sensitivity_benchmark_manual
 
diff --git a/doubleml/tests/test_set_ml_nuisance_params.py b/doubleml/tests/test_set_ml_nuisance_params.py
index a189b184..055bcbff 100644
--- a/doubleml/tests/test_set_ml_nuisance_params.py
+++ b/doubleml/tests/test_set_ml_nuisance_params.py
@@ -3,7 +3,8 @@
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
 from doubleml import DoubleMLCVAR, DoubleMLIIVM, DoubleMLIRM, DoubleMLLPQ, DoubleMLPLIV, DoubleMLPLR, DoubleMLPQ
-from doubleml.datasets import make_iivm_data, make_irm_data, make_pliv_CHS2015, make_plr_CCDDHNR2018
+from doubleml.irm.datasets import make_iivm_data, make_irm_data
+from doubleml.plm.datasets import make_pliv_CHS2015, make_plr_CCDDHNR2018
 
 # set default and test values
 n_est_default = 100
diff --git a/doubleml/tests/test_set_sample_splitting.py b/doubleml/tests/test_set_sample_splitting.py
index 97313a00..0995d831 100644
--- a/doubleml/tests/test_set_sample_splitting.py
+++ b/doubleml/tests/test_set_sample_splitting.py
@@ -3,7 +3,7 @@
 from sklearn.linear_model import Lasso
 
 from doubleml import DoubleMLPLR
-from doubleml.datasets import make_plr_CCDDHNR2018
+from doubleml.plm.datasets import make_plr_CCDDHNR2018
 
 np.random.seed(3141)
 dml_data = make_plr_CCDDHNR2018(n_obs=10)

From 02adb2488ada05014dfcdf927c48cbd6e22b8758 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 4 Jun 2025 14:43:50 +0200
Subject: [PATCH 10/84] update docstrings acc. to Refactor Data Generators #306

---
 doubleml/irm/iivm.py | 2 +-
 doubleml/irm/irm.py  | 2 +-
 doubleml/plm/pliv.py | 2 +-
 doubleml/plm/plr.py  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py
index a43c0a03..70c09cde 100644
--- a/doubleml/irm/iivm.py
+++ b/doubleml/irm/iivm.py
@@ -80,7 +80,7 @@ class DoubleMLIIVM(LinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_iivm_data
+    >>> from doubleml.irm.datasets import make_iivm_data
     >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
     >>> np.random.seed(3141)
     >>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
index 9bf5ed35..10f6377c 100644
--- a/doubleml/irm/irm.py
+++ b/doubleml/irm/irm.py
@@ -84,7 +84,7 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_irm_data
+    >>> from doubleml.irm.datasets import make_irm_data
     >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
     >>> np.random.seed(3141)
     >>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
diff --git a/doubleml/plm/pliv.py b/doubleml/plm/pliv.py
index ba022688..52cb796d 100644
--- a/doubleml/plm/pliv.py
+++ b/doubleml/plm/pliv.py
@@ -62,7 +62,7 @@ class DoubleMLPLIV(LinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_pliv_CHS2015
+    >>> from doubleml.plm.datasets import make_pliv_CHS2015
     >>> from sklearn.ensemble import RandomForestRegressor
     >>> from sklearn.base import clone
     >>> np.random.seed(3141)
diff --git a/doubleml/plm/plr.py b/doubleml/plm/plr.py
index a81bac48..4a57dfcb 100644
--- a/doubleml/plm/plr.py
+++ b/doubleml/plm/plr.py
@@ -60,7 +60,7 @@ class DoubleMLPLR(LinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_plr_CCDDHNR2018
+    >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
     >>> from sklearn.ensemble import RandomForestRegressor
     >>> from sklearn.base import clone
     >>> np.random.seed(3141)

From 39d4e7ea5098c02b64d44ed3edc08df76aa485fc Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 4 Jun 2025 14:44:52 +0200
Subject: [PATCH 11/84] update docstrings acc. to Refactor Data Generators #306

---
 doubleml/irm/apos.py | 2 +-
 doubleml/irm/cvar.py | 2 +-
 doubleml/irm/lpq.py  | 2 +-
 doubleml/irm/pq.py   | 2 +-
 doubleml/irm/qte.py  | 4 ++--
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py
index 8099342a..2960e90d 100644
--- a/doubleml/irm/apos.py
+++ b/doubleml/irm/apos.py
@@ -673,7 +673,7 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
         --------
         >>> import numpy as np
         >>> import doubleml as dml
-        >>> from doubleml.datasets import make_plr_CCDDHNR2018
+        >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
         >>> from sklearn.ensemble import RandomForestRegressor
         >>> from sklearn.base import clone
         >>> np.random.seed(3141)
diff --git a/doubleml/irm/cvar.py b/doubleml/irm/cvar.py
index d2aeaced..57347dce 100644
--- a/doubleml/irm/cvar.py
+++ b/doubleml/irm/cvar.py
@@ -82,7 +82,7 @@ class DoubleMLCVAR(LinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_irm_data
+    >>> from doubleml.irm.datasets import make_irm_data
     >>> from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
     >>> np.random.seed(3141)
     >>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=10, min_samples_leaf=2)
diff --git a/doubleml/irm/lpq.py b/doubleml/irm/lpq.py
index c98e8fa2..f46fb38c 100644
--- a/doubleml/irm/lpq.py
+++ b/doubleml/irm/lpq.py
@@ -83,7 +83,7 @@ class DoubleMLLPQ(NonLinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_iivm_data
+    >>> from doubleml.irm.datasets import make_iivm_data
     >>> from sklearn.ensemble import RandomForestClassifier
     >>> np.random.seed(3141)
     >>> ml_g = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=10, min_samples_leaf=2)
diff --git a/doubleml/irm/pq.py b/doubleml/irm/pq.py
index f64dc471..d0425845 100644
--- a/doubleml/irm/pq.py
+++ b/doubleml/irm/pq.py
@@ -90,7 +90,7 @@ class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_irm_data
+    >>> from doubleml.irm.datasets import make_irm_data
     >>> from sklearn.ensemble import RandomForestClassifier
     >>> np.random.seed(3141)
     >>> ml_g = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=10, min_samples_leaf=2)
diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py
index 68b91a9a..a2c803a3 100644
--- a/doubleml/irm/qte.py
+++ b/doubleml/irm/qte.py
@@ -72,7 +72,7 @@ class DoubleMLQTE:
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_irm_data
+    >>> from doubleml.irm.datasets import make_irm_data
     >>> from sklearn.ensemble import RandomForestClassifier
     >>> np.random.seed(3141)
     >>> ml_g = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=10, min_samples_leaf=2)
@@ -499,7 +499,7 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
         --------
         >>> import numpy as np
         >>> import doubleml as dml
-        >>> from doubleml.datasets import make_plr_CCDDHNR2018
+        >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
         >>> from sklearn.ensemble import RandomForestRegressor
         >>> from sklearn.base import clone
         >>> np.random.seed(3141)

From 83cfe9c88fe94cf172bdbcad1d67182f35957736 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 4 Jun 2025 14:45:56 +0200
Subject: [PATCH 12/84] update irm submod tests acc. to Refactor Data
 Generators #306

---
 doubleml/irm/tests/conftest.py                       | 2 +-
 doubleml/irm/tests/test_apo.py                       | 2 +-
 doubleml/irm/tests/test_iivm_external_predictions.py | 2 +-
 doubleml/irm/tests/test_irm.py                       | 2 +-
 doubleml/irm/tests/test_irm_external_predictions.py  | 2 +-
 doubleml/irm/tests/test_lpq_external_predictions.py  | 2 +-
 doubleml/irm/tests/test_pq_external_predictions.py   | 2 +-
 doubleml/irm/tests/test_qte.py                       | 2 +-
 doubleml/irm/tests/test_qte_exceptions.py            | 2 +-
 doubleml/irm/tests/test_ssm_exceptions.py            | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/doubleml/irm/tests/conftest.py b/doubleml/irm/tests/conftest.py
index 1cf1d525..0a3d4db8 100644
--- a/doubleml/irm/tests/conftest.py
+++ b/doubleml/irm/tests/conftest.py
@@ -4,7 +4,7 @@
 from scipy.linalg import toeplitz
 from sklearn.datasets import make_spd_matrix
 
-from doubleml.datasets import make_iivm_data, make_irm_data
+from doubleml.irm.datasets import make_iivm_data, make_irm_data
 
 
 def _g(x):
diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py
index df4ec284..7558b7c1 100644
--- a/doubleml/irm/tests/test_apo.py
+++ b/doubleml/irm/tests/test_apo.py
@@ -8,7 +8,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data, make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_irm_data, make_irm_data_discrete_treatments
 
 from ...tests._utils import draw_smpls
 from ._utils_apo_manual import boot_apo, fit_apo, fit_sensitivity_elements_apo
diff --git a/doubleml/irm/tests/test_iivm_external_predictions.py b/doubleml/irm/tests/test_iivm_external_predictions.py
index 7f4626e9..d71d2bb5 100644
--- a/doubleml/irm/tests/test_iivm_external_predictions.py
+++ b/doubleml/irm/tests/test_iivm_external_predictions.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 from doubleml import DoubleMLData, DoubleMLIIVM
-from doubleml.datasets import make_iivm_data
+from doubleml.irm.datasets import make_iivm_data
 from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 
diff --git a/doubleml/irm/tests/test_irm.py b/doubleml/irm/tests/test_irm.py
index f99f2253..856c7f59 100644
--- a/doubleml/irm/tests/test_irm.py
+++ b/doubleml/irm/tests/test_irm.py
@@ -8,7 +8,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 from doubleml.utils.resampling import DoubleMLResampling
 
 from ...tests._utils import draw_smpls
diff --git a/doubleml/irm/tests/test_irm_external_predictions.py b/doubleml/irm/tests/test_irm_external_predictions.py
index dabf6c0e..5d0412d5 100644
--- a/doubleml/irm/tests/test_irm_external_predictions.py
+++ b/doubleml/irm/tests/test_irm_external_predictions.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 from doubleml import DoubleMLData, DoubleMLIRM
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 
diff --git a/doubleml/irm/tests/test_lpq_external_predictions.py b/doubleml/irm/tests/test_lpq_external_predictions.py
index 66f2ece6..48cb42f5 100644
--- a/doubleml/irm/tests/test_lpq_external_predictions.py
+++ b/doubleml/irm/tests/test_lpq_external_predictions.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LogisticRegression
 
 from doubleml import DoubleMLData, DoubleMLLPQ
-from doubleml.datasets import make_iivm_data
+from doubleml.irm.datasets import make_iivm_data
 from doubleml.utils import DMLDummyClassifier
 
 from ...tests._utils import draw_smpls
diff --git a/doubleml/irm/tests/test_pq_external_predictions.py b/doubleml/irm/tests/test_pq_external_predictions.py
index 28f8ec66..9674c464 100644
--- a/doubleml/irm/tests/test_pq_external_predictions.py
+++ b/doubleml/irm/tests/test_pq_external_predictions.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LogisticRegression
 
 from doubleml import DoubleMLData, DoubleMLPQ
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 from doubleml.utils import DMLDummyClassifier
 
 from ...tests._utils import draw_smpls
diff --git a/doubleml/irm/tests/test_qte.py b/doubleml/irm/tests/test_qte.py
index 0557c85b..7fcbeec2 100644
--- a/doubleml/irm/tests/test_qte.py
+++ b/doubleml/irm/tests/test_qte.py
@@ -8,7 +8,7 @@
 from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 
 from ...tests._utils import confint_manual, draw_smpls
 from ...utils._estimation import _default_kde
diff --git a/doubleml/irm/tests/test_qte_exceptions.py b/doubleml/irm/tests/test_qte_exceptions.py
index 9f94f5d4..f4e95110 100644
--- a/doubleml/irm/tests/test_qte_exceptions.py
+++ b/doubleml/irm/tests/test_qte_exceptions.py
@@ -6,7 +6,7 @@
 
 from doubleml import DoubleMLData, DoubleMLQTE
 from doubleml.data.base_data import DoubleMLBaseData
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 
 np.random.seed(42)
 n = 100
diff --git a/doubleml/irm/tests/test_ssm_exceptions.py b/doubleml/irm/tests/test_ssm_exceptions.py
index 6ff276e3..50b082ec 100644
--- a/doubleml/irm/tests/test_ssm_exceptions.py
+++ b/doubleml/irm/tests/test_ssm_exceptions.py
@@ -6,7 +6,7 @@
 
 from doubleml import DoubleMLSSM
 from doubleml.data.base_data import DoubleMLBaseData
-from doubleml.datasets import make_ssm_data
+from doubleml.irm.datasets import make_ssm_data
 
 np.random.seed(3141)
 n = 100

From 3ff0edbbae50149d70fdb2b1ccc7f8d8cda75bc9 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 4 Jun 2025 14:47:01 +0200
Subject: [PATCH 13/84] update irm submod tests acc. to Refactor Data
 Generators #306

---
 doubleml/irm/tests/test_apo_exceptions.py            | 2 +-
 doubleml/irm/tests/test_apo_external_predictions.py  | 2 +-
 doubleml/irm/tests/test_apos.py                      | 2 +-
 doubleml/irm/tests/test_apos_classfier.py            | 2 +-
 doubleml/irm/tests/test_apos_exceptions.py           | 2 +-
 doubleml/irm/tests/test_apos_external_predictions.py | 2 +-
 doubleml/irm/tests/test_apos_weighted_scores.py      | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py
index cfb6e93b..e643efca 100644
--- a/doubleml/irm/tests/test_apo_exceptions.py
+++ b/doubleml/irm/tests/test_apo_exceptions.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import Lasso, LogisticRegression
 
 from doubleml import DoubleMLAPO, DoubleMLData
-from doubleml.datasets import make_iivm_data, make_irm_data, make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_iivm_data, make_irm_data, make_irm_data_discrete_treatments
 
 n = 100
 data_apo = make_irm_data_discrete_treatments(n_obs=n)
diff --git a/doubleml/irm/tests/test_apo_external_predictions.py b/doubleml/irm/tests/test_apo_external_predictions.py
index 2bbe50e8..246ef021 100644
--- a/doubleml/irm/tests/test_apo_external_predictions.py
+++ b/doubleml/irm/tests/test_apo_external_predictions.py
@@ -6,7 +6,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 from doubleml import DoubleMLAPO, DoubleMLData
-from doubleml.datasets import make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_irm_data_discrete_treatments
 from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 from ...tests._utils import draw_smpls
diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py
index 746cb63c..55a48ced 100644
--- a/doubleml/irm/tests/test_apos.py
+++ b/doubleml/irm/tests/test_apos.py
@@ -6,7 +6,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data, make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_irm_data, make_irm_data_discrete_treatments
 
 from ...tests._utils import confint_manual
 from ._utils_apos_manual import boot_apos, fit_apos
diff --git a/doubleml/irm/tests/test_apos_classfier.py b/doubleml/irm/tests/test_apos_classfier.py
index 06fdc308..f9cfc10c 100644
--- a/doubleml/irm/tests/test_apos_classfier.py
+++ b/doubleml/irm/tests/test_apos_classfier.py
@@ -6,7 +6,7 @@
 from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_irm_data_discrete_treatments
 
 from ...tests._utils import confint_manual
 from ._utils_apos_manual import boot_apos, fit_apos
diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py
index c309b7e2..f1c9b3d6 100644
--- a/doubleml/irm/tests/test_apos_exceptions.py
+++ b/doubleml/irm/tests/test_apos_exceptions.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import Lasso, LogisticRegression
 
 from doubleml import DoubleMLAPOS, DoubleMLData
-from doubleml.datasets import make_iivm_data, make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_iivm_data, make_irm_data_discrete_treatments
 
 n = 100
 data = make_irm_data_discrete_treatments(n_obs=n)
diff --git a/doubleml/irm/tests/test_apos_external_predictions.py b/doubleml/irm/tests/test_apos_external_predictions.py
index 9e97de07..ed4323ad 100644
--- a/doubleml/irm/tests/test_apos_external_predictions.py
+++ b/doubleml/irm/tests/test_apos_external_predictions.py
@@ -6,7 +6,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 from doubleml import DoubleMLAPOS, DoubleMLData
-from doubleml.datasets import make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_irm_data_discrete_treatments
 from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 from ...tests._utils import draw_smpls
diff --git a/doubleml/irm/tests/test_apos_weighted_scores.py b/doubleml/irm/tests/test_apos_weighted_scores.py
index ea612dec..6d0a7f65 100644
--- a/doubleml/irm/tests/test_apos_weighted_scores.py
+++ b/doubleml/irm/tests/test_apos_weighted_scores.py
@@ -6,7 +6,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_irm_data_discrete_treatments
 
 
 @pytest.fixture(

From caa530e523ff49c07f1447d432868385bb48b685 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 4 Jun 2025 14:59:02 +0200
Subject: [PATCH 14/84] update irm submod tests acc. to Refactor Data
 Generators #306

---
 doubleml/data/tests/conftest.py          | 3 ++-
 doubleml/data/tests/test_cluster_data.py | 2 +-
 doubleml/data/tests/test_dml_data.py     | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/doubleml/data/tests/conftest.py b/doubleml/data/tests/conftest.py
index 6960b58a..fcefabce 100644
--- a/doubleml/data/tests/conftest.py
+++ b/doubleml/data/tests/conftest.py
@@ -2,7 +2,8 @@
 import pandas as pd
 import pytest
 
-from doubleml.datasets import make_irm_data, make_plr_turrell2018
+from doubleml.irm.datasets import make_irm_data
+from doubleml.plm.datasets import make_plr_turrell2018
 
 
 @pytest.fixture(scope="session", params=[(500, 10), (1000, 20), (1000, 100)])
diff --git a/doubleml/data/tests/test_cluster_data.py b/doubleml/data/tests/test_cluster_data.py
index e95dfa03..b02a3275 100644
--- a/doubleml/data/tests/test_cluster_data.py
+++ b/doubleml/data/tests/test_cluster_data.py
@@ -3,7 +3,7 @@
 import pytest
 
 from doubleml import DoubleMLClusterData
-from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018
+from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018
 
 
 @pytest.mark.ci
diff --git a/doubleml/data/tests/test_dml_data.py b/doubleml/data/tests/test_dml_data.py
index 7cf394b5..a2ada74b 100644
--- a/doubleml/data/tests/test_dml_data.py
+++ b/doubleml/data/tests/test_dml_data.py
@@ -5,12 +5,12 @@
 
 from doubleml import DoubleMLData, DoubleMLDIDCS, DoubleMLPLR, DoubleMLSSM
 from doubleml.data.base_data import DoubleMLBaseData
-from doubleml.datasets import (
+from doubleml.plm.datasets import (
     _make_pliv_data,
     make_pliv_CHS2015,
     make_plr_CCDDHNR2018,
-    make_ssm_data,
 )
+from doubleml.irm.datasets import make_ssm_data
 from doubleml.did.datasets import make_did_SZ2020
 
 

From 4cb9148833d05ab409053e29f0890d7df79299c9 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 4 Jun 2025 14:59:07 +0200
Subject: [PATCH 15/84] update docstrings acc. to Refactor Data Generators #306

---
 doubleml/double_ml.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 764865a4..fe4cec5d 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1167,10 +1167,9 @@ def evaluate_learners(self, learners=None, metric=_rmse):
 
         Examples
         --------
-        >>> import numpy as np
-        >>> import doubleml as dml
+        >>> import numpy as np        >>> import doubleml as dml
         >>> from sklearn.metrics import mean_absolute_error
-        >>> from doubleml.datasets import make_irm_data
+        >>> from doubleml.irm.datasets import make_irm_data
         >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
         >>> np.random.seed(3141)
         >>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
@@ -1284,10 +1283,9 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
         self : object
 
         Examples
-        --------
-        >>> import numpy as np
+        --------        >>> import numpy as np
         >>> import doubleml as dml
-        >>> from doubleml.datasets import make_plr_CCDDHNR2018
+        >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
         >>> from sklearn.ensemble import RandomForestRegressor
         >>> from sklearn.base import clone
         >>> np.random.seed(3141)

From 312f601408e70c48cdb6f71b2803cfe570f21169 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 4 Jun 2025 14:59:13 +0200
Subject: [PATCH 16/84] update docstrings acc. to Refactor Data Generators #306

---
 doubleml/did/did_cs.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py
index ab2af5b9..7f33210f 100644
--- a/doubleml/did/did_cs.py
+++ b/doubleml/did/did_cs.py
@@ -63,10 +63,9 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
         Default is ``True``.
 
     Examples
-    --------
-    >>> import numpy as np
+    --------    >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_did_SZ2020
+    >>> from doubleml.did.datasets import make_did_SZ2020
     >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
     >>> np.random.seed(42)
     >>> ml_g = RandomForestRegressor(n_estimators=100, max_depth=5, min_samples_leaf=5)

From 0d07790466aff6674e3d3d34163643ae26f025f8 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 4 Jun 2025 15:00:40 +0200
Subject: [PATCH 17/84] update docstrings acc. to Refactor Data Generators #306

---
 doubleml/data/base_data.py    | 10 ++++------
 doubleml/data/cluster_data.py |  5 ++---
 doubleml/did/did.py           |  5 ++---
 3 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/doubleml/data/base_data.py b/doubleml/data/base_data.py
index 318508e9..7a114220 100644
--- a/doubleml/data/base_data.py
+++ b/doubleml/data/base_data.py
@@ -135,9 +135,8 @@ class DoubleMLData(DoubleMLBaseData):
         Default is ``True``.
 
     Examples
-    --------
-    >>> from doubleml import DoubleMLData
-    >>> from doubleml.datasets import make_plr_CCDDHNR2018
+    --------    >>> from doubleml import DoubleMLData
+    >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
     >>> # initialization from pandas.DataFrame
     >>> df = make_plr_CCDDHNR2018(return_type='DataFrame')
     >>> obj_dml_data_from_df = DoubleMLData(df, 'y', 'd')
@@ -266,9 +265,8 @@ def from_arrays(
             Default is ``True``.
 
         Examples
-        --------
-        >>> from doubleml import DoubleMLData
-        >>> from doubleml.datasets import make_plr_CCDDHNR2018
+        --------        >>> from doubleml import DoubleMLData
+        >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
         >>> (x, y, d) = make_plr_CCDDHNR2018(return_type='array')
         >>> obj_dml_data_from_array = DoubleMLData.from_arrays(x, y, d)
         """
diff --git a/doubleml/data/cluster_data.py b/doubleml/data/cluster_data.py
index 658ab0cc..2cb9fb4f 100644
--- a/doubleml/data/cluster_data.py
+++ b/doubleml/data/cluster_data.py
@@ -61,9 +61,8 @@ class DoubleMLClusterData(DoubleMLData):
         Default is ``True``.
 
     Examples
-    --------
-    >>> from doubleml import DoubleMLClusterData
-    >>> from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021
+    --------    >>> from doubleml import DoubleMLClusterData
+    >>> from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
     >>> # initialization from pandas.DataFrame
     >>> df = make_pliv_multiway_cluster_CKMS2021(return_type='DataFrame')
     >>> obj_dml_data_from_df = DoubleMLClusterData(df, 'Y', 'D', ['cluster_var_i', 'cluster_var_j'], z_cols='Z')
diff --git a/doubleml/did/did.py b/doubleml/did/did.py
index 7a671993..170535ea 100644
--- a/doubleml/did/did.py
+++ b/doubleml/did/did.py
@@ -63,10 +63,9 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
         Default is ``True``.
 
     Examples
-    --------
-    >>> import numpy as np
+    --------    >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_did_SZ2020
+    >>> from doubleml.did.datasets import make_did_SZ2020
     >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
     >>> np.random.seed(42)
     >>> ml_g = RandomForestRegressor(n_estimators=100, max_depth=5, min_samples_leaf=5)

From 8b4f4bcd63876abecbce07bce21aa300ca4b9790 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 4 Jun 2025 15:15:51 +0200
Subject: [PATCH 18/84] update documentations acc. to Refactor Data Generators
 #306

---
 .github/ISSUE_TEMPLATE/bug_report.yml | 6 ++----
 CONTRIBUTING.md                       | 2 +-
 doubleml/data/cluster_data.py         | 5 ++---
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index baa6d625..3e5321ea 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -23,12 +23,10 @@ body:
     attributes:
       label: Minimum reproducible code snippet
       description: |
-        Please provide a short reproducible code snippet. Example:
-
-        ```python
+        Please provide a short reproducible code snippet. Example:        ```python
         import numpy as np
         import doubleml as dml
-        from doubleml.datasets import make_plr_CCDDHNR2018
+        from doubleml.plm.datasets import make_plr_CCDDHNR2018
         from sklearn.ensemble import RandomForestRegressor
         from sklearn.base import clone
         np.random.seed(3141)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4809c62a..a614dd73 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -15,7 +15,7 @@ To submit a **bug report**, you can use our
 ```python
 import numpy as np
 import doubleml as dml
-from doubleml.datasets import make_plr_CCDDHNR2018
+from doubleml.plm.datasets import make_plr_CCDDHNR2018
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.base import clone
 np.random.seed(3141)
diff --git a/doubleml/data/cluster_data.py b/doubleml/data/cluster_data.py
index 2cb9fb4f..89947b73 100644
--- a/doubleml/data/cluster_data.py
+++ b/doubleml/data/cluster_data.py
@@ -171,9 +171,8 @@ def from_arrays(
             Default is ``True``.
 
         Examples
-        --------
-        >>> from doubleml import DoubleMLClusterData
-        >>> from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021
+        --------        >>> from doubleml import DoubleMLClusterData
+        >>> from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
         >>> (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(return_type='array')
         >>> obj_dml_data_from_array = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars, z)
         """

From 5c443952cfe497bfebea0135e4af1e2345148b9f Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 4 Jun 2025 15:39:43 +0200
Subject: [PATCH 19/84] update tests acc. to Refactor Data Generators #306

---
 doubleml/plm/tests/test_plr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/plm/tests/test_plr.py b/doubleml/plm/tests/test_plr.py
index 79f21f84..65f5ad83 100644
--- a/doubleml/plm/tests/test_plr.py
+++ b/doubleml/plm/tests/test_plr.py
@@ -304,7 +304,7 @@ def test_dml_plr_cate_gate(score, cov_type):
 
     # collect data
     np.random.seed(42)
-    obj_dml_data = dml.datasets.make_plr_CCDDHNR2018(n_obs=n)
+    obj_dml_data = dml.plm.datasets.make_plr_CCDDHNR2018(n_obs=n)
     ml_l = LinearRegression()
     ml_g = LinearRegression()
     ml_m = LinearRegression()

From a9f428474950aa1703f52138b9d298ac62989dfa Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 4 Jun 2025 22:59:07 +0200
Subject: [PATCH 20/84] upd

---
 doubleml/data/__init__.py                     |   8 +-
 doubleml/data/base_data.py                    | 290 ++++++++----------
 doubleml/data/base_data_content.txt           | Bin 0 -> 60862 bytes
 doubleml/data/cluster_data.py                 |   8 +-
 doubleml/data/did_data.py                     | 272 ++++++++++++++++
 doubleml/data/panel_data.py                   |  50 ++-
 doubleml/data/rdd_data.py                     | 272 ++++++++++++++++
 doubleml/data/ssm_data.py                     | 274 +++++++++++++++++
 doubleml/data/tests/test_cluster_data.py      | 139 ++++-----
 .../dgp_pliv_multiway_cluster_CKMS2021.py     |   8 +-
 doubleml/tests/test_multiway_cluster.py       |   9 +-
 doubleml/tests/test_nonlinear_cluster.py      |  13 +-
 doubleml/tests/test_sensitivity_cluster.py    |   4 +-
 doubleml/utils/_aliases.py                    |   5 +-
 14 files changed, 1077 insertions(+), 275 deletions(-)
 create mode 100644 doubleml/data/base_data_content.txt
 create mode 100644 doubleml/data/did_data.py
 create mode 100644 doubleml/data/rdd_data.py
 create mode 100644 doubleml/data/ssm_data.py

diff --git a/doubleml/data/__init__.py b/doubleml/data/__init__.py
index d8a920c6..dfe673e7 100644
--- a/doubleml/data/__init__.py
+++ b/doubleml/data/__init__.py
@@ -3,11 +3,15 @@
 """
 
 from .base_data import DoubleMLData
-from .cluster_data import DoubleMLClusterData
+from .did_data import DoubleMLDIDData
 from .panel_data import DoubleMLPanelData
+from .rdd_data import DoubleMLRDDData
+from .ssm_data import DoubleMLSSMData
 
 __all__ = [
     "DoubleMLData",
-    "DoubleMLClusterData",
+    "DoubleMLDIDData",
     "DoubleMLPanelData",
+    "DoubleMLRDDData",
+    "DoubleMLSSMData",
 ]
diff --git a/doubleml/data/base_data.py b/doubleml/data/base_data.py
index 7a114220..8d585633 100644
--- a/doubleml/data/base_data.py
+++ b/doubleml/data/base_data.py
@@ -4,14 +4,15 @@
 import numpy as np
 import pandas as pd
 from sklearn.utils import assert_all_finite
-from sklearn.utils.multiclass import type_of_target
 from sklearn.utils.validation import check_array, check_consistent_length, column_or_1d
 
 from doubleml.utils._estimation import _assure_2d_array
 
 
 class DoubleMLBaseData(ABC):
-    """Base Class Double machine learning data-backends"""
+    """Bas        x_cols = [f"X{i + 1}" for i in np.arange(x.shape[1])]
+        # baseline version with features, outcome and treatments
+        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + [y_col] + d_cols)Class Double machine learning data-backends"""
 
     def __init__(self, data):
         if not isinstance(data, pd.DataFrame):
@@ -98,24 +99,23 @@ class DoubleMLData(DoubleMLBaseData):
     x_cols : None, str or list
         The covariates.
         If ``None``, all variables (columns of ``data``) which are neither specified as outcome variable ``y_col``, nor
-        treatment variables ``d_cols``, nor instrumental variables ``z_cols`` are used as covariates.
+        treatment variables ``d_cols``, nor instrumental variables ``z_cols``, nor cluster variables ``cluster_cols``
+        are used as covariates.
         Default is ``None``.
 
     z_cols : None, str or list
         The instrumental variable(s).
+        Default is ``None``.    cluster_cols : None, str or list
+        The cluster variable(s).
         Default is ``None``.
-
-    t_col : None or str
-        The time variable (only relevant/used for DiD Estimators).
-        Default is ``None``.
-
-    s_col : None or str
-        The score or selection variable (only relevant/used for RDD or SSM Estimatiors).
-        Default is ``None``.
-
+        
     use_other_treat_as_covariate : bool
         Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
         Default is ``True``.
+        
+    is_cluster_data : bool
+        Flag indicating whether this data object is being used for cluster data.
+        Default is ``False``.
 
     force_all_x_finite : bool or str
         Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
@@ -135,7 +135,8 @@ class DoubleMLData(DoubleMLBaseData):
         Default is ``True``.
 
     Examples
-    --------    >>> from doubleml import DoubleMLData
+    --------
+    >>> from doubleml import DoubleMLData
     >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
     >>> # initialization from pandas.DataFrame
     >>> df = make_plr_CCDDHNR2018(return_type='DataFrame')
@@ -152,27 +153,29 @@ def __init__(
         d_cols,
         x_cols=None,
         z_cols=None,
-        t_col=None,
-        s_col=None,
+        cluster_cols=None,
         use_other_treat_as_covariate=True,
         force_all_x_finite=True,
         force_all_d_finite=True,
+        is_cluster_data=False,
     ):
         DoubleMLBaseData.__init__(self, data)
 
         self.y_col = y_col
         self.d_cols = d_cols
         self.z_cols = z_cols
-        self.t_col = t_col
-        self.s_col = s_col
+        self.cluster_cols = cluster_cols
         self.x_cols = x_cols
+        self.is_cluster_data = is_cluster_data
         self._check_disjoint_sets()
         self.use_other_treat_as_covariate = use_other_treat_as_covariate
         self.force_all_x_finite = force_all_x_finite
         self.force_all_d_finite = force_all_d_finite
         self._binary_treats = self._check_binary_treats()
         self._binary_outcome = self._check_binary_outcome()
-        self._set_y_z_t_s()
+        self._set_y_z()
+        if self.cluster_cols is not None:
+            self._set_cluster_vars()
         # by default, we initialize to the first treatment variable
         self.set_x_d(self.d_cols[0])
 
@@ -188,7 +191,7 @@ def __str__(self):
             + "\n------------------ DataFrame info    ------------------\n"
             + df_info
         )
-        return res
+        return res    
 
     def _data_summary_str(self):
         data_summary = (
@@ -197,10 +200,12 @@ def _data_summary_str(self):
             f"Covariates: {self.x_cols}\n"
             f"Instrument variable(s): {self.z_cols}\n"
         )
-        if self.t_col is not None:
-            data_summary += f"Time variable: {self.t_col}\n"
-        if self.s_col is not None:
-            data_summary += f"Score/Selection variable: {self.s_col}\n"
+
+        if self.cluster_cols is not None:
+            data_summary += f"Cluster variable(s): {self.cluster_cols}\n"
+
+        if hasattr(self, 'is_cluster_data') and self.is_cluster_data:
+            data_summary += f"Is cluster data: {self.is_cluster_data}\n"
         data_summary += f"No. Observations: {self.n_obs}\n"
         return data_summary
 
@@ -211,11 +216,11 @@ def from_arrays(
         y,
         d,
         z=None,
-        t=None,
-        s=None,
+        cluster_vars=None,
         use_other_treat_as_covariate=True,
         force_all_x_finite=True,
         force_all_d_finite=True,
+        is_cluster_data=False,
     ):
         """
         Initialize :class:`DoubleMLData` from :class:`numpy.ndarray`'s.
@@ -229,18 +234,12 @@ def from_arrays(
             Array of the outcome variable.
 
         d : :class:`numpy.ndarray`
-            Array of treatment variables.
-
-        z : None or :class:`numpy.ndarray`
+            Array of treatment variables.        z : None or :class:`numpy.ndarray`
             Array of instrumental variables.
             Default is ``None``.
 
-        t : :class:`numpy.ndarray`
-            Array of the time variable (only relevant/used for DiD models).
-            Default is ``None``.
-
-        s : :class:`numpy.ndarray`
-            Array of the score or selection variable (only relevant/used for RDD and SSM models).
+        cluster_vars : None or :class:`numpy.ndarray`
+            Array of cluster variables.
             Default is ``None``.
 
         use_other_treat_as_covariate : bool
@@ -300,6 +299,7 @@ def from_arrays(
         d = _assure_2d_array(d)
 
         y_col = "y"
+
         if z is None:
             check_consistent_length(x, y, d)
             z_cols = None
@@ -312,39 +312,30 @@ def from_arrays(
             else:
                 z_cols = [f"z{i + 1}" for i in np.arange(z.shape[1])]
 
-        if t is None:
-            t_col = None
-        else:
-            t = column_or_1d(t, warn=True)
-            check_consistent_length(x, y, d, t)
-            t_col = "t"
-
-        if s is None:
-            s_col = None
+        if cluster_vars is None:
+            cluster_cols = None
         else:
-            s = column_or_1d(s, warn=True)
-            check_consistent_length(x, y, d, s)
-            s_col = "s"
+            cluster_vars = check_array(cluster_vars, ensure_2d=False, allow_nd=False)
+            cluster_vars = _assure_2d_array(cluster_vars)
+            check_consistent_length(x, y, d, cluster_vars)
+            if cluster_vars.shape[1] == 1:
+                cluster_cols = ["cluster_var"]
+            else:
+                cluster_cols = [f"cluster_var{i + 1}" for i in np.arange(cluster_vars.shape[1])]
 
         if d.shape[1] == 1:
             d_cols = ["d"]
         else:
             d_cols = [f"d{i + 1}" for i in np.arange(d.shape[1])]
 
-        x_cols = [f"X{i + 1}" for i in np.arange(x.shape[1])]
-
-        # baseline version with features, outcome and treatments
+        x_cols = [f"X{i + 1}" for i in np.arange(x.shape[1])]        # baseline version with features, outcome and treatments
         data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + [y_col] + d_cols)
-
         if z is not None:
             df_z = pd.DataFrame(z, columns=z_cols)
             data = pd.concat([data, df_z], axis=1)
-
-        if t is not None:
-            data[t_col] = t
-
-        if s is not None:
-            data[s_col] = s
+        if cluster_vars is not None:
+            df_cluster = pd.DataFrame(cluster_vars, columns=cluster_cols)
+            data = pd.concat([data, df_cluster], axis=1)
 
         return cls(
             data,
@@ -352,11 +343,11 @@ def from_arrays(
             d_cols,
             x_cols,
             z_cols,
-            t_col,
-            s_col,
+            cluster_cols,
             use_other_treat_as_covariate,
             force_all_x_finite,
             force_all_d_finite,
+            is_cluster_data,
         )
 
     @property
@@ -397,24 +388,35 @@ def z(self):
             return None
 
     @property
-    def t(self):
+    def cluster_cols(self):
         """
-        Array of time variable.
+        The cluster variable(s).
         """
-        if self.t_col is not None:
-            return self._t.values
-        else:
-            return None
+        return self._cluster_cols
 
-    @property
-    def s(self):
-        """
-        Array of score or selection variable.
-        """
-        if self.s_col is not None:
-            return self._s.values
+    @cluster_cols.setter
+    def cluster_cols(self, value):
+        reset_value = hasattr(self, "_cluster_cols")
+        if value is not None:
+            if isinstance(value, str):
+                value = [value]
+            if not isinstance(value, list):
+                raise TypeError(
+                    "The cluster variable(s) cluster_cols must be of str or list type (or None). "
+                    f"{str(value)} of type {str(type(value))} was passed."
+                )
+            if not len(set(value)) == len(value):
+                raise ValueError("Invalid cluster variable(s) cluster_cols: Contains duplicate values.")
+            if not set(value).issubset(set(self.all_variables)):
+                raise ValueError("Invalid cluster variable(s) cluster_cols. At least one cluster variable is no data column.")
+            self._cluster_cols = value
         else:
-            return None
+            self._cluster_cols = None
+
+        if reset_value:
+            self._check_disjoint_sets()
+            if self.cluster_cols is not None:
+                self._set_cluster_vars()
 
     @property
     def n_treat(self):
@@ -538,7 +540,7 @@ def y_col(self, value):
         self._y_col = value
         if reset_value:
             self._check_disjoint_sets()
-            self._set_y_z_t_s()
+            self._set_y_z()
 
     @property
     def z_cols(self):
@@ -567,59 +569,30 @@ def z_cols(self, value):
             self._z_cols = value
         else:
             self._z_cols = None
+
         if reset_value:
             self._check_disjoint_sets()
-            self._set_y_z_t_s()
+            self._set_y_z()
 
     @property
-    def t_col(self):
+    def n_cluster_vars(self):
         """
-        The time variable.
+        The number of cluster variables.
         """
-        return self._t_col
-
-    @t_col.setter
-    def t_col(self, value):
-        reset_value = hasattr(self, "_t_col")
-        if value is not None:
-            if not isinstance(value, str):
-                raise TypeError(
-                    "The time variable t_col must be of str type (or None). "
-                    f"{str(value)} of type {str(type(value))} was passed."
-                )
-            if value not in self.all_variables:
-                raise ValueError(f"Invalid time variable t_col. {value} is no data column.")
-            self._t_col = value
+        if self.cluster_cols is not None:
+            return len(self.cluster_cols)
         else:
-            self._t_col = None
-        if reset_value:
-            self._check_disjoint_sets()
-            self._set_y_z_t_s()
+            return 0
 
     @property
-    def s_col(self):
+    def cluster_vars(self):
         """
-        The score or selection variable.
+        Array of cluster variable(s).
         """
-        return self._s_col
-
-    @s_col.setter
-    def s_col(self, value):
-        reset_value = hasattr(self, "_s_col")
-        if value is not None:
-            if not isinstance(value, str):
-                raise TypeError(
-                    "The score or selection variable s_col must be of str type (or None). "
-                    f"{str(value)} of type {str(type(value))} was passed."
-                )
-            if value not in self.all_variables:
-                raise ValueError(f"Invalid score or selection variable s_col. {value} is no data column.")
-            self._s_col = value
+        if self.cluster_cols is not None:
+            return self._cluster_vars.values
         else:
-            self._s_col = None
-        if reset_value:
-            self._check_disjoint_sets()
-            self._set_y_z_t_s()
+            return None
 
     @property
     def use_other_treat_as_covariate(self):
@@ -684,7 +657,7 @@ def force_all_d_finite(self, value):
             # by default, we initialize to the first treatment variable
             self.set_x_d(self.d_cols[0])
 
-    def _set_y_z_t_s(self):
+    def _set_y_z(self):
         def _set_attr(col):
             if col is None:
                 return None
@@ -693,8 +666,12 @@ def _set_attr(col):
 
         self._y = _set_attr(self.y_col)
         self._z = _set_attr(self.z_cols)
-        self._t = _set_attr(self.t_col)
-        self._s = _set_attr(self.s_col)
+
+    def _set_cluster_vars(self):
+        """Set cluster variables."""
+        if self.cluster_cols is not None:
+            assert_all_finite(self.data.loc[:, self.cluster_cols])
+            self._cluster_vars = self.data.loc[:, self.cluster_cols]
 
     def set_x_d(self, treatment_var):
         """
@@ -728,40 +705,15 @@ def set_x_d(self, treatment_var):
     def _get_optional_col_sets(self):
         # this function can be extended in inherited subclasses
         z_cols_set = set(self.z_cols or [])
-        t_col_set = {self.t_col} if self.t_col else set()
-        s_col_set = {self.s_col} if self.s_col else set()
-
-        return [z_cols_set, t_col_set, s_col_set]
-
-    def _check_binary_treats(self):
-        is_binary = pd.Series(dtype=bool, index=self.d_cols)
-        if not self.force_all_d_finite:
-            is_binary[:] = False  # if we allow infinite values, we cannot check for binary
-        else:
-            for treatment_var in self.d_cols:
-                this_d = self.data.loc[:, treatment_var]
-                binary_treat = type_of_target(this_d) == "binary"
-                zero_one_treat = np.all((np.power(this_d, 2) - this_d) == 0)
-                is_binary[treatment_var] = binary_treat & zero_one_treat
-        return is_binary
-
-    def _check_binary_outcome(self):
-        y = self.data.loc[:, self.y_col]
-        binary_outcome = type_of_target(y) == "binary"
-        zero_one_outcome = np.all((np.power(y, 2) - y) == 0)
-        is_binary = binary_outcome & zero_one_outcome
-        return is_binary
-
-    @staticmethod
-    def _check_disjoint(set1, set2, name1, arg1, name2, arg2):
-        """Helper method to check for disjoint sets."""
-        if not set1.isdisjoint(set2):
-            raise ValueError(f"At least one variable/column is set as {name1} ({arg1}) and {name2} ({arg2}).")
+        cluster_cols_set = set(self.cluster_cols or [])
+        return [cluster_cols_set, z_cols_set]
 
     def _check_disjoint_sets(self):
         # this function can be extended in inherited subclasses
         self._check_disjoint_sets_y_d_x()
-        self._check_disjoint_sets_z_t_s()
+        self._check_disjoint_sets_z()
+        if self.cluster_cols is not None:
+            self._check_disjoint_sets_cluster_cols()
 
     def _check_disjoint_sets_y_d_x(self):
         y_col_set = {self.y_col}
@@ -782,14 +734,12 @@ def _check_disjoint_sets_y_d_x(self):
                 "(``x_cols``). Consider using parameter ``use_other_treat_as_covariate``."
             )
 
-    def _check_disjoint_sets_z_t_s(self):
+    def _check_disjoint_sets_z(self):
         y_col_set = {self.y_col}
         x_cols_set = set(self.x_cols)
         d_cols_set = set(self.d_cols)
 
         z_cols_set = set(self.z_cols or [])
-        t_col_set = {self.t_col} if self.t_col else set()
-        s_col_set = {self.s_col} if self.s_col else set()
 
         instrument_checks_args = [
             (y_col_set, "outcome variable", "``y_col``"),
@@ -801,12 +751,38 @@ def _check_disjoint_sets_z_t_s(self):
                 set1=set1, name1=name, arg1=argument, set2=z_cols_set, name2="instrumental variable", arg2="``z_cols``"
             )
 
-        time_check_args = instrument_checks_args + [(z_cols_set, "instrumental variable", "``z_cols``")]
-        for set1, name, argument in time_check_args:
-            self._check_disjoint(set1=set1, name1=name, arg1=argument, set2=t_col_set, name2="time variable", arg2="``t_col``")
-
-        score_check_args = time_check_args + [(t_col_set, "time variable", "``t_col``")]
-        for set1, name, argument in score_check_args:
+    def _check_disjoint_sets_cluster_cols(self):
+        """Check that cluster columns are disjoint from other variable sets."""
+        cluster_cols_set = set(self.cluster_cols)
+        y_col_set = {self.y_col}
+        x_cols_set = set(self.x_cols)
+        d_cols_set = set(self.d_cols)
+        z_cols_set = set(self.z_cols or [])
+        checks = [
+            (y_col_set, "outcome variable", "``y_col``"),
+            (d_cols_set, "treatment variable", "``d_cols``"),
+            (x_cols_set, "covariate", "``x_cols``"),
+            (z_cols_set, "instrumental variable", "``z_cols``"),
+        ]
+        for set1, name, arg in checks:
             self._check_disjoint(
-                set1=set1, name1=name, arg1=argument, set2=s_col_set, name2="score or selection variable", arg2="``s_col``"
+                set1=set1,
+                name1=name,
+                arg1=arg,
+                set2=cluster_cols_set,
+                name2="cluster variable(s)",
+                arg2="``cluster_cols``",
             )
+
+    @property
+    def is_cluster_data(self):
+        """
+        Flag indicating whether this data object is being used for cluster data.
+        """
+        return self._is_cluster_data
+
+    @is_cluster_data.setter
+    def is_cluster_data(self, value):
+        if not isinstance(value, bool):
+            raise TypeError(f"is_cluster_data must be True or False. Got {str(value)}.")
+        self._is_cluster_data = value
diff --git a/doubleml/data/base_data_content.txt b/doubleml/data/base_data_content.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ccdf7ca9a7ea2f43ee36364c7123001ff431e8c
GIT binary patch
literal 60862
zcmeI5|8EsXlECNZlkR_bXMVtF7boofvdW1Q93*>4$tJoW6p4!*1BS2xlf#ezyZP7e
zu0B&Ps=BAUr{DNJFvPN8?DwXt>w8sK*Yy0~|2`g`4nGgihUdeJVKqFK-~M^{r{SUe
z{Y<`D4F|)$;r{qdyzlz(AH$8|k0;+B%h=~~=Y6?89S-IHA7%7|5faeSIgW;x<D9R@
z(Z}-tkzg>r?`MH{Bv1|p?$Q5$K7s1^9S=VZPh`#m!Qg1v6WA|gz9-|DbY37F%bifV
zFZVx@pM9b9v5b2xpB-kD*CFo>zl`G_kJvHaGl2p3B9HD17Y>CdKaI0JA35}T<XQaY
z{y6qn#=^xzfdh6>bTmAY-+w$A@k~C0Z%6Vi(%K)c1P(ht9>}*ZMP@*FD*s20?8&IZ
zaUL)|5a?yh;SRidDf1l4m_Nx(d}cHtEBkj}=Fq!W!_6@FSHqX0BYblxpEK9Nusf*U
zTpB(b$FGEE{OpWBcK1p&s|<5BVh#6@*`bVd$@9r0`2>AouFJv$H1a384=p-AnZ@w7
zxgW^)4`tlG{Fo;2`|gPO6ZsA;H>f%02IcVx<4EXND?*+}0s%~p1dBs~XE1<=9`C*7
z%NL_=U^zATHV;;Yra=4c;kCdy96lfAf==uT7CUd}fm<&}99F^$Mm!s_tj)a=Y!%|3
z-23-&Brpx{OT%w(Vc4DVKKm?=&@sOX*4Qu>tu_etJ3=keFxu>z^3rDC6E3<99Lsq4
zurCy1z5gTE!%V9To?RpF?g%E>Q_8)a;g&#i+pf#$Gr{_X;Dfay5oC>)yc8Or$lv$n
zQ>=NFV2!6c5UF5;b^4JmdWW?d^{e6P2w!!(rNiorQw^?Zl|kPZeDE#!gy*6II$H^k
z{7(MH_wQC@c~9VCU0{!|*&Dtc?}uZLgxg<NNBZoBB@#apXwPKKFT$heqSeo2{zHNO
zg<y`)KOC)9XT2)lGkaOf9>_Op@jz4z_5MA<j2NbRfR5Ct!#g5(sM{^-vgzKA+%x|(
z??G3>v3tYcWL{z|GtdA0w{UHziQOLr($2trmpgvIQuS~p)Ter5^sYMKTpnkf$IRrP
z>t4xs*!)W=a!-uDs<s)-9^UlvI@P@r9c7(G-(CpE$#hooKiXIK1vS38&6<rZ^AE(D
zzm7JoS_!o0qt#emaV#ELBZPT0&0g*a-oG`ZpVC>QBd7eD_+GMBVj?-->qO@*&Koq3
z?=|W*D=A6m_L$juR<M`MWcpGcpVDVii2i;VZI;vGxt@#ulDm^39gBCeS=Oiay1;lM
za-2dXE9X=4ZJSH&hFpT|*LlSJFU0?s__;o(Op19{u#9i6m8ap8X@#zD$he*h{hD(<
z6G@cpWApqt7QP>hHgI?NbohSaA1(+^w<U_)lz&(!Sd&R1OY&rw9B~oJKO24>eIeO(
zZehsx*{EUUHk$pCMUZt99mu`;pA3|hnL{T#*Kb-Y2%sLy5BWQq1%zGs`*@`AzI>uO
zPtF3qa9lMn>e&VPRI}kH^6g_8i&bzZmPjlkCnJ|DYdP5*xL%U)jZ4sqhTDvq(I_8*
z%AI5+<QCA5jlf+jlNFls8j4sC0G%;L!{P8s?qoHonC2@hd<EGwIqg#!9jWd0gZn4B
zX^(eEJ=R_E{7OC9?@mF>`f$MRTn5{+J)fZ^<<nW>m5H6TIdDz*hDQLma^1wDh{Y{<
z#R`bkYLCQhtz{mOYs=DZ3LZ#vS3HAP5K61~Jc6kAj-yVqPFFcZef})q{n_xt1V6+4
z2eBXgtYyd8B9=?8r0ZX76pK@@@pyP6+{7lWdeWLTR-;NQ_aBCfA{XqN|9=(88a4Ir
zoCdFf?TwxQOGXo33w^+ua^V~KR;`^lir-1CzLcFtdMskDMz5daGf!QFWx7f1z-t-o
zDb}w~w3hsVXrmh6f`#tA7&RSho%-YH9Mq<?HUQ^+7PJBH%zB)8TeJ4~2>jEff@EGz
z<|ivAZmDMn_Byygi|1O%+VsKrZr59)xhv6_AI3bK8Tp-AH7ZWI2Zwe~xTV!ZKG7V7
zu_j4=C))iwW(QQ<G;$#k=D~_A`gts-mZ}z|T0bYt)rha_h+$7A@w_On9yvh~kfL6s
zkTpkDU4#o7`{7xcV~l<iq?|*;uWF@D^8uHh<=?9jqx3%4THuwm6N`uAz>a+F*N~es
z?xASIOTm!rMl(I;GacpksZB*{)u!zJWjc-L?c;Z}GShRr_EKZ_*Z92&ic;89z1ajE
zC7p<$sa~)jVAdG(DXrqGmbpf1RT3{r^^6QZLNw0Z8l#feF|jFp0kOyI9+_IUJ<YB!
z%bnBg+A?V$=eYOh<E(M;Et*klY(EftCs)!O8+<omCAWnJDu8Y!#v!{#?rB9j)r?rf
zo#Xl!Leasvive#Rh_z}RnZH<l<q|*^YSHGm(+tS$9S#``_a(O0Nb$|1xzTCYx(#{_
z*0~kAqw}#B;=6Ce&bR}omT8)4QL~;k+)DUFq%3iXH5j=QyuzPqPKd8&EvC^KU4};~
zHrS2s69C!PexJziI_~KRQ+InFjk5#8wY#K)t5KU?HFWA5fzCe3W3f?P|K)gKS!ffS
zjyg(o&*SqHM@F&dO700w{Ec7ds=Gsfm7O~N(p5bin&S<3D@?spM{5L&T3~vl849%p
zZ1&RI`BN=|o+<t&hdG=qe%9u?I7(M59G6dcaq)!iU6H|&aG%OT2?LGHIb@BbMx$Mc
zo$7b7Sk_rlL-1u-I+_XBat;`^eAmL*sCB&6|5AfM&ZbMczove?XQf7d#NF)ZV+H0x
zN>VkSV$fOVV0F--6h4$s_}y#7o>}7UScw>>FJ#V@e1et!B5?5U8fnSt@$`I${=mNo
zk%|>Klx3&27q{ZJ9+l{1M3mt)V^>+Wti0|?qeV}1vyvYr*G)ZFj5X%1K3qRq$|XFf
zNwe$+qZO4c@!Yc#0r4c~p4H>|wmd5ylRA+5Oy;zjI=<BWWg1~h9>=0Uuh-Iu?LKOK
z3qnH<B}aH9|M0Y6pwxJ_`E-m5eCo9ki+Xll>gPUBYT)f^SLE2g5BzvlkBrtUn)F1$
zo7kxpYnN0!Zwjr{Lf3=3Cp>y0aQ9?B9gnTO7-MESpY4!P!+#<XY+rgwZpf@RhyRpy
z`F}~j)u+<k_2=UJ7e?&!Iwtd3FPBv_O=e(21<9!LJ?;{H>`iM&Lob%QV+^)^X;z@x
zMRZlNmFb)pCD$v>tCd?;OGwC9O~f_g|I_g^!>uMUdXKbY>!M(Bam*?&%4fA%*WspR
z%<$8ywIxb+MQ_Qbwbzf_c60pnnQIjF@X?_tW;Jf^1>XA@b{}#IBOiP>H-3sIM0(Rc
zyLgxR3@k@`?@Hey0qpaSi6+@tuP2tjk9EQF9kHT&^=+lBd~d9j&E1=>w7vc}H*RU2
z=jz+FVHx*QCYi6um%zxg(K#6Hx=GsI74;$Ci82ky_1QX0%^R_M+Sl#LY;i`L!Sr)a
ztlZ?Nw&Z`e$3-pFk@1uE*r;<lHh$6`Thl>>Q~S0JGun&7iJ!E`wq>u-+V`~KBONN1
z+K0_b@mO{US#fKpwZ-UKkDp6=sTC>>OGme<>#<;+zvs^N=gKX;K39Hcd#-({0U#lw
z4i@FvD_JxBmM)i6!u4_U63%DM_pkcrO-J_3ta`XVQu{_CY?>vR-_hEQ(_XVzGF&*t
zP8!uF+r3Hq?hUG49WFhB<gV~G?rrU#>`amalrg*@v)vOa$hza+6B!Y;K3$jOyYQ@2
zsP$Ogh*}0a+*&EqI##Por>#v!3ts7gVD5BApR4^+N>zjxGZ(F(oOf406dU~TyQ?i9
zIPa?V-v6li<`?Yx`{}ASis44&i2DLtgXCT5o9eD^KTkmK_198KZ*|I00p=+lT}j1E
zwbgrVlrsZ$=(+w#-RI8dihi|swi^-wyr!#tFFD+pSN6iaB~eiQw)c9}_UrTcVqV>v
zZyGg>kK2vdH7#5Xe;BhQq<ABV;JuXRai5(`s(g_P(K1qc+2vx_=Y{<6bdYHwS-{0H
zH<@M@Mhle<ug%&>p8nx^uN*HM)y{0}uH0H9pKb|!x&~st<GbOxX0)-1Tr$qswFejS
zHL9*(OO%BJ@AtV1c!E5Sr3&YJfl>PA7rj=O){El3#s{DtS86!amr7%(sg*u|Q2G3g
zSb^of<&mkpJ-?kBw{$%>R}1M`r!9ezufOJCwDo)anv7QkP5ySpKTp>Kd53CV)44f%
zmxFO9%e0Q}5!Y3Eh1c)1HLT|PZSB6fzFWKRl~^-AWE~7knr`r}i%-ywVRm<^td@FB
zrn|>Bo)y}5@v*gBx)z?spE{20@NXN(`3#4>6XTj)-<)7(zQuOfw<leyA$b<VUgDIW
zwQga<{igWvB|V`wzUDDiR_~4@DIV{h4@)C}^2vJKS~iYn^e(`1cN==)brYWWkIy)L
zNL`qwUv>RtJ(6}~mbljwK~ou+^{Rc9U>a^c646T5htzLq)cqd1V~kes!}$374~=Gg
z^Y5b%(I{45;QsMr?MF0v(`Zo22<Q6}4c0dNaJ~xpSpBf?SJ~4=rTj!?(dt(;S)6<3
zX<zim&>3#>J=>mnlYNXv@rT>bXcU~*KeqZCZH~OUV4vG{JH3wm(6`(3nBy_+JJVi(
zO>`=Hy)y5t%dvR>9d?FMy{4wS4%G0-2w2zH6BwKk$FocA=dh^L>uRXhV`Vt#Og%7H
z!@rDs5<7y6^+4NGH#<`2X|20#wz#UgGs4NwaO;6$(5C0T_0iEjv&x{`m0i;wxa(l9
zJF(QI*2iV13mbi+6CJSA6KwU3Zum1wwstN*nZzk~J#>|ON!Ynm{W=;~F51<yT%)b#
zGN;o~%Wg|AOMc4YGQCl0vEzi)dNg8lGML++?DrIT2EPeUk;D8@J;jucKJB0L6gk`;
zi;6IJ#tQcOxRkXt$7XX{+C$goJw;9r8vSlPMUKI|r^rWaP6l;P!44K3kg=cEvnI>U
z^u(`ta&Wp=7@vUJAM4xs_a)C5pKZL_<vYuI!j4jl2KJFxvOhx3bXI9#=T2$x^Kj#t
z(7O_`pn_9(IVsY2dgo<3ck(k*ZgX}X0#C$X=_PCT?s=tgM`%&MRk~Yq1NP#GU%g$f
zQGcnBA#27{8J(WKZF=ob^OP_#ZyWoClmgoiD~;0U=H|{VhQ5~0zS)4zmSyTxDF*Yh
zeN)j6gLFFHqt$aWm`?18MExYhD{@91G{DCzookJ$EIIe?jr`M76^*9++9y>TRhosa
ztvAxU_gJtcTf8D&S37c5&xp2b7d8FeoZ^6;v%cfp;$0azrTtRjj6TCaIhO9GdLPWX
zwiUal=+@G%u}#@R9m|%5)JC1&*SGDaHgB&r)yB`w>d(8Z@896bvU%HjG}Xy<?J>tH
z)h%o@zeBr8H*2r?961))ZhnvAvDBW+(CG5WJ>0yNbMAgVs#gx=oH@0;ZJ@^AK0jE3
z3J)JF_J1WBTbzkK#cG*%#nbK#-z~=ARe{T3U>?`57vvK=2bW3*&)2h0q^CRD^W1)x
z$wT>uo?6XJ>7Atdl6HYxU46O(l|DglI9NBTQeJyE1g8oSX;l7ta-ep)>pM1fCGO<U
z1nZeTam2C2-;~=#x4V+#nP$NwK8spGH*oq>%B3vsTfDN0k>y|Q`l6o5Zkbk{3z}=?
zG{qR+_RS@31C0}LknFB-!|p-4hS&F_tPg~HWU5ysBPh?)_Q9v&P3nc_v?aaUvk8qZ
zzHWh*{9ffQ=Sh_0Y?@$PikQrEGM%f&)zZB53ECM48X<WGu5@RPVY%<v?sDC4-xn>S
zeP^_=_MK60+jpjXGi$ANZyRk(JLKB9l<EV}9BTyYLx8{i5U@hHkY>B4ljYe}pQFZH
z6Q9c27VS3{yb?XLryy#T>-tX6b(w+H0ab%~pYXpGHTkBD;u%~{r@1e?H>=^R6G#ul
z8o*jl(!zgvkInrtK4CAcru76h@b7(=vy97a!G1NaaDYRU<NusC-B~L4LUW!CF*f3^
zI2vcHWBIfclsy+7UeAQ)ydm91^s{2_zWpTH@AndO$sW-&`ac<2KG~|S0jcLQP2voM
z?=tA<=KA{idc*peXEd5U``auwNB`@M&>x>D-h_QlGq-8)P}xSN_DYX}*(nvWvuCM2
z)PnhkVY@L(Z`XEN<wVc0%yv0xz&yexbl3J~>C?8(H@w$WgKJMI)cu&|wubGV)VVwl
zS(e$>JtH-=oV%9Q*tKtco6cdYk!8}#^9{?CPb0(I<LM}Q&1Whv=CIT6jRSVA%Pghd
zWjD>8jFu^9YE)CnAgiEmpPm*^gx4pc()`TkaDK5KaXHs404j7YUCC%P)|E%=N!Oi1
zj%YZv-+t&gEz@kZ6|z*bNbmn~*ihfs<ztfeu}b~jz`b@=Trsj$c@Jjx-iDe~3@5wJ
zb#27V#>UjHJo`=+wbw54`=VzmzlBrzPDGCHs342heiI^|erG-d>sz<qhdP$O$<w9R
z|1Qrsru{b09{!dyW0OLvBb<DFbP}JajHH;Lf7JZ#uJ~=c)?>3peYx{C$Nqd=Q^5IM
z;oU94Kq2dS!>KQ-dC9sqd?py*7_%@s^3*n|@=V_w>husPYu>_WNjay!D&?3yKhLhV
zK2Z)w?LMyh#!+3NSiQz`ZhZd|JA7As#WJpsD@^o(XVqCHlI`}`aP+bM#6ME>O4Ym!
zoQw77re4!!Te6wHyrO?L^B;L+O?9Un<Jy*#;^4bZ>z}fg;^J^qdy8I`2(s?^RyK>n
zr-n@qF_joH;sh!?=PF97uWs}4yxq)vtn)3dbC%Y%)_tQ{J<GIMtxx->Z(hWP$p)$I
z@xDN<_@F!boTk1#g#3;Zbm=4~pJa6vC2pEi$Dx+#rdp8OV813=9v4qjsd?l&mMu>C
z48C?d3#&9Z=(wo|ooZxU4VJTvKEGO;?bD(zq!LfZEt7NW>_W4>^=1C8=reDFwdcp}
z%?P~r;*or=bxB>{5eefn=loTB^9+%-r+Q<M@u!T5RMMqp_;^&rIzN?vwwHNjW7e3T
z@aXvK?r606KHH4c(1jOwV||^uzSO=<q_eJEQaq-4s%1P8`}Zx26z63Y6l>?F;lBAv
zqK9Sz<ibRT^*v(hC(Mrb#H&SYr}V~e=W8ADZl`?v^^G`hZIz;{6Sr^EKC1>;hT6_H
z6-rO#;Jl_``AWRTn#NSCSBW-G`^k0h@VsgfIjX;>YwwtuoSmx0oV?HL6195I9OvdL
z5Bd1$`<B-l^fW&5I_ez6lw8*~QSXV_P_Fm2I994~l;HGb<Gcr5YXH>r<Em5Vshw}S
z8vYUZ;<!c#<yC^wNmquwQ-!LS)gG*3K#ofN(P8hObSIDn&W*LIg3Xp+t<!rBlIDd}
zy5?qV$5xxa#qWQN^v=yxlC?pPyoz~)a`Ns;z0LdFwf<%_`e^I3N&69zi27n1XZZ!I
z7@q2>>5}eTlrUF#WN^$*oyfD8Hb)&>G-ngFu?s@O**IB!Do^7MQjPr(^tYJh97}KG
zr_u$xv|bb;d4{|G%F1$kU;Eik^>m%YJe%D0j5MHcQRl8DyqeGWBP{2~_A~xCZ#(1P
zT(0g`XTHKeDepU`Sy>Mzz59)E->CF-c$BGGN`CcRyk85+7TlJWX6XHUh-YQ*upFmT
z`ED!gj<UWZ6#(7&Z|@N<mFT8(@RUR9q2KkM;Jf5sI}`7b%IR|R81Yj}B~NsJRq_w#
zc}9sV<~5zO?;_9ej(m@*%d?EMsyWRtHnq>BZw}JT7hXnwUJFmy%;yy)`nR7|_^j=m
z!gKmf^)^KxwKSd+D=n{_lQ?gNv^YK&#jP6%-*y)Icv`X8J)G~$>&naRx87gUUQI<A
zmE$lHxFzd6#X`Gv){%sA*lGApIPN_L>sM-==4hFkz4|QCO3mAyrC1QDFyD*z`Wwn3
z1${eaai?|@9u^(by0q_Gqi0iJyXn<jq-5%I<G1r(H{R`B*}mV6^R_*&Yi+a)>W}C9
zH)gx}HTtckvBh9|gjrYPIW@x~4pp-Aj#T66>ZBUwf2Y5tYp89lGHuSNEq6Lso2S!-
zO|LnPl9X>6Io|c!({P#RYK&nvs&<`b8K;v#d)3+xJNo2Sw$ejuIm<JS#GD}YEA#8z
zzFnX83>Izjh->(L?D-R&W_e#Mv01xSjd#metmQ7s_$19Vvs%9^Mx|ufvXc{Mh|$Wk
z@cvvQ?YW75Tl?H=vz0Z!PixaTwP!F|TVHmZW3~y&UKicK3&v-_Be$pWT$U-nOPRXn
z)UDwxKEt>4OaxyOtZ&)3WQFwmd5=)N`#H7Hs6k7mlE<KH(*FrudH`OGyS~rmcdtF`
z33}??)UvIwaDIij6tZj1_E(cjacr+V*Qxn!>f~1HNlU4juCQ*4|K$V+d*08_&nEx-
z%cw2&3Sa%%qfO}Wc56}>gudT%*Fw)}HEyBX@$Dbp8_AaV@E%T2{dLc%dEeXv#ihJ`
zKiq>)+voOlO!W0xYgS0K)7|qA^>lZ7o8k#kJd3TT)kWI$bbq|MeOo?7eF`nF%2~`n
zTRdJ^1bLtDTX1^8ZBr3<T3PAQ;`hnv@@g^0<Q@vn<L@@R21^p4J0n`Md*+WX{`%UZ
z-sQ-i04IyJ-bD9%cmtj8PCt`p5<mWTf^I6q%5AAH!^%f3%dm1NZpZH@!^-hF&#<f}
z-xu3+{&^1N8Hc?;X+G{xp`B+~R05Tk+9A^u9p5LXmt|N}$<OahE#Eczcq<%sj+#C5
zq@7GXX9iA3^7J&h$nMEgKDA8CJci1{p5)OzAlvzC_lg;WwAyQ9<Bp8oY1jBv9hF*6
zYprF9esb%c-1SVTx-0J$A?_&7_MD5s&aWwUzGm=eo61o0_-b$+0#ed7h0D#k@Ar8Z
z;=Gl;>4F${gB51v_EaZMyVv#Y_)J!Q+SYV8OHa@u<B9kf=eE0RwHcjqspULVZ99JZ
zT$NFkUmZeyzf85p@2u&f-%-aV*He=|V|k7)U-Mx()3u77L{{?_^(^)askd{gzCFVU
zbk1sI4_RgMSf1BBlHD5phT1%5akE$KPax_ZP6@M;meGT86yT9>JQ*?gd4zjee#h>?
z!dGLQzXDgZ6TGCM-^d;8-!eu|yDY!UA%9yif0y7*u`oXC``ERvR~l(rb^V01ysM&*
zJCXmK`JyLb>*-6X6WqZW-|L^fq!!%XC92bm=Ji!FV<*wlZa(&v>bOjIj?!}%%KhMV
zc>U)*wqPtj_puyVR9^KN>;2<ocINTgSwNK49kCs)J;x`vN*&{MoXCRz-jH0{FOHW{
zUl1{_@8jh$Xz9*ln<p0pE431}TJwkI<yZ~ao0-9$lb`&(G@PYKT|_-5ma5i!AfFwK
z&!6+H(QRJM&guuA`L4snFRO+V4@|Ot@>ArV`fdI<9GS1>rJz@90?_-T%>P30JeEA|
z6ZzrnD=6ngOns94seF1<c)%ybfv<)y<&y{Dlc)0eWvLx<KImnE^;*6sH|1PSo)Q0K
z`1A0uGd-oR1mgb4SG-Ohb5lOBo8Zwla-&6eI>X)=bDj#$c<Wu6Gmk;${ma(c*Zq{v
z*`lh1d9MFG>(aKaZ#wUaytntpm>%*TQ_d>88f4W&0bF8bT7DyXSsFc@LWg2~`(1D<
zmH$!><(MzwklPDq5#*X!%W3OYQGUKW&hlONEIF63g=i0!C9ijuh~ZMplJSFhI$cFS
z6knBQ#`tRTRyzyxmE6ranPB4{9S_bq7Mvx>_k5%039||?dn;$Aab;+$QkRUKMXbG<
z@m|~Qyjxx0)K(yKv((1T&zP><IkhDAQhF3K49G7dP4@Zp)Df3{?};I2U-E__=iT*D
zrBx>UEpI+5d%-18{h2je#n?6XsWpkl9{c`Y<R!S)Ffe|UeZ~_08g@0`;Z-8jTeaKT
z$x9J4<8RDqQYg%xT7SH^Ec+OV=b*)V6<P^DmA|bYrxfd1TKTEgyzy{q?KM6cFZ$Lc
z{;aRbXD>W8tm`rL+J<$ylExe3$>M3G(~5=e`Z~Sr0ekLEHqKrF{+@~jIkc{6xG&Pq
z-*aml!_#Lo+10okWoP%Ep1URan{4dL%J3{mV49PZWau)995b1;=q=mV1(9=WmFO$N
zG@J2wu5mnSi0<0CE=NpFtF*fHqiCIG7O_qUM!x#=6{o!;RV`SvnD$Du#1AM=v9*(j
zQ#{qG()H(AT4APsEaGnVQ?s>5z1{n6Y;C9U-UjzRx*q@Jq(Y$YpS5VLF~q6^eKxZz
z9*ce#_uu@@n`TY&D(k5aEbk$h=dkOJWG+CAeAOAKBi#@kr}E4mEcx_G{Is5CLB-`~
z!6B{ou0~i(YeLM6cP?qZ;X93u>*k_*CV*LZO?Rf=|KrOhO1ZxK#x&cB_fA(HC5hqP
zv|8<_9mLF<_mZMNkH!pu>cZWo<aa|}VV2K|WoI^f*co>_cShZGxuR|Ky_Da9YWlXD
z*j_qsj|IoMb1UwdJ=$eDd&%OJA5a#@wJ9xE4YVqxHN6+|7H1&q8&(g;-&JxhS&NP9
zNWER{F$<krkG0=zY;siJd*!6^l~cs$Q|kCMrxQLXtUt-O(JG>J3>Nq7jJarT3B=}U
zj-=Y6bLv=R7iUpfmh8<a-Gqm%uj))HHNUC*oHxe$%l+Xep^<uy?q;S?t^Yt{B&z`E
zCU?)*FYI!%<M~v+^QYCO+%w6vchOTUJg%DjY96pQuAG~?pGo0<sH;)aF1tpZOes|B
z@D9iP{g?W#xK^Fa&Y(pn&f|h{JYLHydiDOBiVHU1ZsLDT);d?o+qXQM#^9(s+lx&W
zk-XN#ep<_Ons3`)#A(fL{I=MU#-yH+8aF&@wKE%=&!bt*!@10RurT~Jex)#{J{v!A
zNn&Ilu6blkp}Q|vt!l%Z#>u{O57?VvrnHuByEX3j+7YtE$uGv;3079vl|Kj1U$u7|
z=}w8wPJFyK{7s@L{Y19zq~c~UbZti6!ZN&+5~45B`rxF?$F#(*^**L_w;|e6n`3Yy
za^srd+T!qh2Y!3JO9&sPJ;v;b*W{JfDu8WXj}`cR85z$D0_LWkGo!lKre)fNb6YZ{
QZ9l&~hZ8Z|`HF1+ANk8VaR2}S

literal 0
HcmV?d00001

diff --git a/doubleml/data/cluster_data.py b/doubleml/data/cluster_data.py
index 89947b73..290c61f5 100644
--- a/doubleml/data/cluster_data.py
+++ b/doubleml/data/cluster_data.py
@@ -84,13 +84,11 @@ def __init__(
         use_other_treat_as_covariate=True,
         force_all_x_finite=True,
     ):
-        DoubleMLBaseData.__init__(self, data)
-
-        # we need to set cluster_cols (needs _data) before call to the super __init__ because of the x_cols setter
+        DoubleMLBaseData.__init__(self, data)        # we need to set cluster_cols (needs _data) before call to the super __init__ because of the x_cols setter
         self.cluster_cols = cluster_cols
         self._set_cluster_vars()
         DoubleMLData.__init__(
-            self, data, y_col, d_cols, x_cols, z_cols, t_col, s_col, use_other_treat_as_covariate, force_all_x_finite
+            self, data, y_col, d_cols, x_cols, z_cols, t_col, s_col, use_other_treat_as_covariate, force_all_x_finite, is_cluster_data=True
         )
         self._check_disjoint_sets_cluster_cols()
 
@@ -176,7 +174,7 @@ def from_arrays(
         >>> (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(return_type='array')
         >>> obj_dml_data_from_array = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars, z)
         """
-        dml_data = DoubleMLData.from_arrays(x, y, d, z, t, s, use_other_treat_as_covariate, force_all_x_finite)
+        dml_data = DoubleMLData.from_arrays(x, y, d, z, t, s, use_other_treat_as_covariate, force_all_x_finite, is_cluster_data=True)
         cluster_vars = check_array(cluster_vars, ensure_2d=False, allow_nd=False)
         cluster_vars = _assure_2d_array(cluster_vars)
         if cluster_vars.shape[1] == 1:
diff --git a/doubleml/data/did_data.py b/doubleml/data/did_data.py
new file mode 100644
index 00000000..150aeb7d
--- /dev/null
+++ b/doubleml/data/did_data.py
@@ -0,0 +1,272 @@
+import io
+import numpy as np
+import pandas as pd
+from sklearn.utils.validation import check_array
+
+from doubleml.data.base_data import DoubleMLData
+from doubleml.utils._estimation import _assure_2d_array
+
+
+class DoubleMLDIDData(DoubleMLData):
+    """Double machine learning data-backend for Difference-in-Differences models.
+
+    :class:`DoubleMLDIDData` objects can be initialized from
+    :class:`pandas.DataFrame`'s as well as :class:`numpy.ndarray`'s.
+
+    Parameters
+    ----------
+    data : :class:`pandas.DataFrame`
+        The data.
+
+    y_col : str
+        The outcome variable.
+
+    d_cols : str or list
+        The treatment variable(s).
+
+    t_col : str
+        The time variable for DiD models.
+
+    x_cols : None, str or list
+        The covariates.
+        If ``None``, all variables (columns of ``data``) which are neither specified as outcome variable ``y_col``, nor
+        treatment variables ``d_cols``, nor instrumental variables ``z_cols``, nor time variable ``t_col`` are used as covariates.
+        Default is ``None``.
+
+    z_cols : None, str or list
+        The instrumental variable(s).
+        Default is ``None``.
+
+    cluster_cols : None, str or list
+        The cluster variable(s).
+        Default is ``None``.
+
+    use_other_treat_as_covariate : bool
+        Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
+        Default is ``True``.
+
+    force_all_x_finite : bool or str
+        Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
+        Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
+        allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
+        Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
+        for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
+        in the covariates ``x``.
+        Default is ``True``.
+
+    force_all_d_finite : bool
+        Indicates whether to raise an error on infinite values and / or missings in the treatment variables ``d``.
+        Default is ``True``.
+
+    Examples
+    --------    >>> from doubleml import DoubleMLDIDData
+    >>> from doubleml.did.datasets import make_did_SZ2020
+    >>> # initialization from pandas.DataFrame
+    >>> df = make_did_SZ2020(return_type='DataFrame')
+    >>> obj_dml_data_from_df = DoubleMLDIDData(df, 'y', 'd', 't')
+    >>> # initialization from np.ndarray
+    >>> (x, y, d, t) = make_did_SZ2020(return_type='array')
+    >>> obj_dml_data_from_array = DoubleMLDIDData.from_arrays(x, y, d, t=t)
+    """
+
+    def __init__(
+        self,
+        data,
+        y_col,
+        d_cols,
+        t_col,
+        x_cols=None,
+        z_cols=None,
+        cluster_cols=None,
+        use_other_treat_as_covariate=True,
+        force_all_x_finite=True,
+        force_all_d_finite=True,
+    ):
+        # Set time column before calling parent constructor
+        self.t_col = t_col
+        
+        # Call parent constructor
+        super().__init__(
+            data=data,
+            y_col=y_col,
+            d_cols=d_cols,
+            x_cols=x_cols,
+            z_cols=z_cols,
+            cluster_cols=cluster_cols,
+            use_other_treat_as_covariate=use_other_treat_as_covariate,
+            force_all_x_finite=force_all_x_finite,
+            force_all_d_finite=force_all_d_finite,
+        )
+        
+        # Set time variable array after data is loaded
+        self._set_time_var()
+
+    @classmethod
+    def from_arrays(
+        cls,
+        x,
+        y,
+        d,
+        t,
+        z=None,
+        cluster_vars=None,
+        use_other_treat_as_covariate=True,
+        force_all_x_finite=True,
+        force_all_d_finite=True,
+    ):
+        """
+        Initialize :class:`DoubleMLDIDData` object from :class:`numpy.ndarray`'s.
+
+        Parameters
+        ----------
+        x : :class:`numpy.ndarray`
+            Array of covariates.
+
+        y : :class:`numpy.ndarray`
+            Array of the outcome variable.
+
+        d : :class:`numpy.ndarray`
+            Array of treatment variables.
+
+        t : :class:`numpy.ndarray`
+            Array of the time variable for DiD models.
+
+        z : None or :class:`numpy.ndarray`
+            Array of instrumental variables.
+            Default is ``None``.
+
+        cluster_vars : None or :class:`numpy.ndarray`
+            Array of cluster variables.
+            Default is ``None``.
+
+        use_other_treat_as_covariate : bool
+            Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
+            Default is ``True``.
+
+        force_all_x_finite : bool or str
+            Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
+            Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
+            allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
+            Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
+            for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
+            in the covariates ``x``.
+            Default is ``True``.
+
+        force_all_d_finite : bool
+            Indicates whether to raise an error on infinite values and / or missings in the treatment variables ``d``.
+            Default is ``True``.
+
+        Examples
+        --------        >>> from doubleml import DoubleMLDIDData
+        >>> from doubleml.did.datasets import make_did_SZ2020
+        >>> (x, y, d, t) = make_did_SZ2020(return_type='array')
+        >>> obj_dml_data_from_array = DoubleMLDIDData.from_arrays(x, y, d, t=t)
+        """
+        # Prepare time variable
+        t = check_array(t, ensure_2d=False, allow_nd=False)
+        t = _assure_2d_array(t)
+        if t.shape[1] != 1:
+            raise ValueError("t must be a single column.")
+        t_col = "t"
+        
+        # Create base data using parent class method
+        base_data = DoubleMLData.from_arrays(
+            x, y, d, z, cluster_vars, use_other_treat_as_covariate, force_all_x_finite, force_all_d_finite
+        )
+        
+        # Add time variable to the DataFrame
+        data = pd.concat((base_data.data, pd.DataFrame(t, columns=[t_col])), axis=1)
+        
+        return cls(
+            data,
+            base_data.y_col,
+            base_data.d_cols,
+            t_col,
+            base_data.x_cols,
+            base_data.z_cols,
+            base_data.cluster_cols,
+            base_data.use_other_treat_as_covariate,
+            base_data.force_all_x_finite,
+            base_data.force_all_d_finite,
+        )
+
+    @property
+    def t_col(self):
+        """
+        The time variable.
+        """
+        return self._t_col
+
+    @t_col.setter
+    def t_col(self, value):
+        if not isinstance(value, str):
+            raise TypeError(
+                "The time variable t_col must be of str type. "
+                f"{str(value)} of type {str(type(value))} was passed."
+            )
+        # Check if data exists (during initialization it might not)
+        if hasattr(self, '_data') and value not in self.all_variables:
+            raise ValueError("Invalid time variable t_col. The time variable is no data column.")
+        self._t_col = value
+        # Update time variable array if data is already loaded
+        if hasattr(self, '_data'):
+            self._set_time_var()
+
+    @property
+    def t(self):
+        """
+        Array of time variable.
+        """
+        return self._t.values
+
+    def _get_optional_col_sets(self):
+        """Get optional column sets including time column."""
+        base_optional_col_sets = super()._get_optional_col_sets()
+        t_col_set = {self.t_col}
+        return [t_col_set] + base_optional_col_sets
+
+    def _check_disjoint_sets(self):
+        """Check that time column doesn't overlap with other variables."""
+        # Apply standard checks from parent class
+        super()._check_disjoint_sets()
+        self._check_disjoint_sets_t_col()
+
+    def _check_disjoint_sets_t_col(self):
+        """Check that time column is disjoint from other variable sets."""
+        t_col_set = {self.t_col}
+        y_col_set = {self.y_col}
+        x_cols_set = set(self.x_cols)
+        d_cols_set = set(self.d_cols)
+        z_cols_set = set(self.z_cols or [])
+        cluster_cols_set = set(self.cluster_cols or [])
+
+        t_checks_args = [
+            (y_col_set, "outcome variable", "``y_col``"),
+            (d_cols_set, "treatment variable", "``d_cols``"),
+            (x_cols_set, "covariate", "``x_cols``"),
+            (z_cols_set, "instrumental variable", "``z_cols``"),
+            (cluster_cols_set, "cluster variable(s)", "``cluster_cols``"),
+        ]
+        for set1, name, argument in t_checks_args:
+            self._check_disjoint(
+                set1=set1,
+                name1=name,
+                arg1=argument,
+                set2=t_col_set,
+                name2="time variable",
+                arg2="``t_col``",
+            )
+
+    def _set_time_var(self):
+        """Set the time variable array."""
+        if hasattr(self, '_data') and self.t_col in self.data.columns:
+            self._t = self.data.loc[:, [self.t_col]]
+
+    def __str__(self):
+        """String representation."""
+        data_summary = self._data_summary_str()
+        buf = io.StringIO()
+        print("================== DoubleMLDIDData Object ==================", file=buf)
+        print(f"Time variable: {self.t_col}", file=buf)
+        print(data_summary, file=buf)
+        return buf.getvalue()
diff --git a/doubleml/data/panel_data.py b/doubleml/data/panel_data.py
index f548ae6a..f34b2ee1 100644
--- a/doubleml/data/panel_data.py
+++ b/doubleml/data/panel_data.py
@@ -83,15 +83,15 @@ def __init__(
         x_cols=None,
         z_cols=None,
         use_other_treat_as_covariate=True,
-        force_all_x_finite=True,
-        datetime_unit="M",
+        force_all_x_finite=True,        datetime_unit="M",
     ):
         DoubleMLBaseData.__init__(self, data)
 
         # we need to set id_col (needs _data) before call to the super __init__ because of the x_cols setter
         self.id_col = id_col
         self._datetime_unit = _is_valid_datetime_unit(datetime_unit)
-        self._set_id_var()
+        self._set_id_var()        # Set t_col first before calling parent constructor
+        self.t_col = t_col
 
         DoubleMLData.__init__(
             self,
@@ -100,8 +100,6 @@ def __init__(
             d_cols=d_cols,
             x_cols=x_cols,
             z_cols=z_cols,
-            t_col=t_col,
-            s_col=None,
             use_other_treat_as_covariate=use_other_treat_as_covariate,
             force_all_x_finite=force_all_x_finite,
             force_all_d_finite=False,
@@ -110,6 +108,7 @@ def __init__(
             raise ValueError("Only one treatment column is allowed for panel data.")
 
         self._check_disjoint_sets_id_col()
+        self._set_t()
 
         # intialize the unique values of g and t
         self._g_values = np.sort(np.unique(self.d))  # unique values of g
@@ -217,9 +216,7 @@ def n_obs(self):
         """
         The number of observations. For panel data, the number of unique values for id_col.
         """
-        return len(self._id_var_unique)
-
-    @property
+        return len(self._id_var_unique)    @property
     def g_col(self):
         """
         The treatment variable indicating the time of treatment exposure.
@@ -235,8 +232,7 @@ def d_cols(self, value):
     @property
     def g_values(self):
         """
-        The unique values of the treatment variable (groups) ``d``.
-        """
+        The unique values of the treatment variable (groups) ``d``.        """
         return self._g_values
 
     @property
@@ -246,13 +242,36 @@ def n_groups(self):
         """
         return len(self.g_values)
 
-    @DoubleMLData.t_col.setter
+    @property
+    def t_col(self):
+        """
+        The time variable.
+        """
+        return self._t_col
+
+    @t_col.setter
     def t_col(self, value):
         if value is None:
             raise TypeError("Invalid time variable t_col. Time variable required for panel data.")
-        super(self.__class__, self.__class__).t_col.__set__(self, value)
-        if hasattr(self, "_t_values"):
-            self._t_values = np.sort(np.unique(self.t))  # update unique values of t
+        reset_value = hasattr(self, "_t_col")
+        if not isinstance(value, str):
+            raise TypeError(
+                f"The time variable t_col must be of str type. {str(value)} of type {str(type(value))} was passed."
+            )
+        if value not in self.all_variables:
+            raise ValueError(f"Invalid time variable t_col. {value} is no data column.")
+        self._t_col = value
+        if reset_value:
+            self._check_disjoint_sets()
+            self._set_t()
+            if hasattr(self, "_t_values"):
+                self._t_values = np.sort(np.unique(self.t))  # update unique values of t
+
+    def _set_t(self):
+        """Set time variable."""
+        if self.t_col is not None:
+            assert_all_finite(self.data.loc[:, self.t_col])
+            self._t = self.data.loc[:, self.t_col]
 
     @property
     def t_values(self):
@@ -271,7 +290,8 @@ def n_t_periods(self):
     def _get_optional_col_sets(self):
         base_optional_col_sets = super()._get_optional_col_sets()
         id_col_set = {self.id_col}
-        return [id_col_set] + base_optional_col_sets
+        t_col_set = {self.t_col}  # t_col is not None for panel data
+        return [id_col_set, t_col_set] + base_optional_col_sets
 
     def _check_disjoint_sets(self):
         # apply the standard checks from the DoubleMLData class
diff --git a/doubleml/data/rdd_data.py b/doubleml/data/rdd_data.py
new file mode 100644
index 00000000..3798dd7e
--- /dev/null
+++ b/doubleml/data/rdd_data.py
@@ -0,0 +1,272 @@
+import io
+import numpy as np
+import pandas as pd
+from sklearn.utils.validation import check_array
+
+from doubleml.data.base_data import DoubleMLData
+from doubleml.utils._estimation import _assure_2d_array
+
+
+class DoubleMLRDDData(DoubleMLData):
+    """Double machine learning data-backend for Regression Discontinuity Design models.
+
+    :class:`DoubleMLRDDData` objects can be initialized from
+    :class:`pandas.DataFrame`'s as well as :class:`numpy.ndarray`'s.
+
+    Parameters
+    ----------
+    data : :class:`pandas.DataFrame`
+        The data.
+
+    y_col : str
+        The outcome variable.
+
+    d_cols : str or list
+        The treatment variable(s).
+
+    s_col : str
+        The score/running variable for RDD models.
+
+    x_cols : None, str or list
+        The covariates.
+        If ``None``, all variables (columns of ``data``) which are neither specified as outcome variable ``y_col``, nor
+        treatment variables ``d_cols``, nor instrumental variables ``z_cols``, nor score variable ``s_col`` are used as covariates.
+        Default is ``None``.
+
+    z_cols : None, str or list
+        The instrumental variable(s).
+        Default is ``None``.
+
+    cluster_cols : None, str or list
+        The cluster variable(s).
+        Default is ``None``.
+
+    use_other_treat_as_covariate : bool
+        Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
+        Default is ``True``.
+
+    force_all_x_finite : bool or str
+        Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
+        Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
+        allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
+        Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
+        for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
+        in the covariates ``x``.
+        Default is ``True``.
+
+    force_all_d_finite : bool
+        Indicates whether to raise an error on infinite values and / or missings in the treatment variables ``d``.
+        Default is ``True``.
+
+    Examples
+    --------    >>> from doubleml import DoubleMLRDDData
+    >>> from doubleml.rdd.datasets import make_rdd_data
+    >>> # initialization from pandas.DataFrame
+    >>> df = make_rdd_data(return_type='DataFrame')
+    >>> obj_dml_data_from_df = DoubleMLRDDData(df, 'y', 'd', 's')
+    >>> # initialization from np.ndarray
+    >>> (x, y, d, s) = make_rdd_data(return_type='array')
+    >>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(x, y, d, s=s)
+    """
+
+    def __init__(
+        self,
+        data,
+        y_col,
+        d_cols,
+        s_col,
+        x_cols=None,
+        z_cols=None,
+        cluster_cols=None,
+        use_other_treat_as_covariate=True,
+        force_all_x_finite=True,
+        force_all_d_finite=True,
+    ):
+        # Set score column before calling parent constructor
+        self.s_col = s_col
+        
+        # Call parent constructor
+        super().__init__(
+            data=data,
+            y_col=y_col,
+            d_cols=d_cols,
+            x_cols=x_cols,
+            z_cols=z_cols,
+            cluster_cols=cluster_cols,
+            use_other_treat_as_covariate=use_other_treat_as_covariate,
+            force_all_x_finite=force_all_x_finite,
+            force_all_d_finite=force_all_d_finite,
+        )
+        
+        # Set score variable array after data is loaded
+        self._set_score_var()
+
+    @classmethod
+    def from_arrays(
+        cls,
+        x,
+        y,
+        d,
+        s,
+        z=None,
+        cluster_vars=None,
+        use_other_treat_as_covariate=True,
+        force_all_x_finite=True,
+        force_all_d_finite=True,
+    ):
+        """
+        Initialize :class:`DoubleMLRDDData` object from :class:`numpy.ndarray`'s.
+
+        Parameters
+        ----------
+        x : :class:`numpy.ndarray`
+            Array of covariates.
+
+        y : :class:`numpy.ndarray`
+            Array of the outcome variable.
+
+        d : :class:`numpy.ndarray`
+            Array of treatment variables.
+
+        s : :class:`numpy.ndarray`
+            Array of the score/running variable for RDD models.
+
+        z : None or :class:`numpy.ndarray`
+            Array of instrumental variables.
+            Default is ``None``.
+
+        cluster_vars : None or :class:`numpy.ndarray`
+            Array of cluster variables.
+            Default is ``None``.
+
+        use_other_treat_as_covariate : bool
+            Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
+            Default is ``True``.
+
+        force_all_x_finite : bool or str
+            Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
+            Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
+            allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
+            Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
+            for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
+            in the covariates ``x``.
+            Default is ``True``.
+
+        force_all_d_finite : bool
+            Indicates whether to raise an error on infinite values and / or missings in the treatment variables ``d``.
+            Default is ``True``.
+
+        Examples
+        --------        >>> from doubleml import DoubleMLRDDData
+        >>> from doubleml.rdd.datasets import make_rdd_data
+        >>> (x, y, d, s) = make_rdd_data(return_type='array')
+        >>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(x, y, d, s=s)
+        """
+        # Prepare score variable
+        s = check_array(s, ensure_2d=False, allow_nd=False)
+        s = _assure_2d_array(s)
+        if s.shape[1] != 1:
+            raise ValueError("s must be a single column.")
+        s_col = "s"
+        
+        # Create base data using parent class method
+        base_data = DoubleMLData.from_arrays(
+            x, y, d, z, cluster_vars, use_other_treat_as_covariate, force_all_x_finite, force_all_d_finite
+        )
+        
+        # Add score variable to the DataFrame
+        data = pd.concat((base_data.data, pd.DataFrame(s, columns=[s_col])), axis=1)
+        
+        return cls(
+            data,
+            base_data.y_col,
+            base_data.d_cols,
+            s_col,
+            base_data.x_cols,
+            base_data.z_cols,
+            base_data.cluster_cols,
+            base_data.use_other_treat_as_covariate,
+            base_data.force_all_x_finite,
+            base_data.force_all_d_finite,
+        )
+
+    @property
+    def s_col(self):
+        """
+        The score/running variable.
+        """
+        return self._s_col
+
+    @s_col.setter
+    def s_col(self, value):
+        if not isinstance(value, str):
+            raise TypeError(
+                "The score variable s_col must be of str type. "
+                f"{str(value)} of type {str(type(value))} was passed."
+            )
+        # Check if data exists (during initialization it might not)
+        if hasattr(self, '_data') and value not in self.all_variables:
+            raise ValueError("Invalid score variable s_col. The score variable is no data column.")
+        self._s_col = value
+        # Update score variable array if data is already loaded
+        if hasattr(self, '_data'):
+            self._set_score_var()
+
+    @property
+    def s(self):
+        """
+        Array of score/running variable.
+        """
+        return self._s.values
+
+    def _get_optional_col_sets(self):
+        """Get optional column sets including score column."""
+        base_optional_col_sets = super()._get_optional_col_sets()
+        s_col_set = {self.s_col}
+        return [s_col_set] + base_optional_col_sets
+
+    def _check_disjoint_sets(self):
+        """Check that score column doesn't overlap with other variables."""
+        # Apply standard checks from parent class
+        super()._check_disjoint_sets()
+        self._check_disjoint_sets_s_col()
+
+    def _check_disjoint_sets_s_col(self):
+        """Check that score column is disjoint from other variable sets."""
+        s_col_set = {self.s_col}
+        y_col_set = {self.y_col}
+        x_cols_set = set(self.x_cols)
+        d_cols_set = set(self.d_cols)
+        z_cols_set = set(self.z_cols or [])
+        cluster_cols_set = set(self.cluster_cols or [])
+
+        s_checks_args = [
+            (y_col_set, "outcome variable", "``y_col``"),
+            (d_cols_set, "treatment variable", "``d_cols``"),
+            (x_cols_set, "covariate", "``x_cols``"),
+            (z_cols_set, "instrumental variable", "``z_cols``"),
+            (cluster_cols_set, "cluster variable(s)", "``cluster_cols``"),
+        ]
+        for set1, name, argument in s_checks_args:
+            self._check_disjoint(
+                set1=set1,
+                name1=name,
+                arg1=argument,
+                set2=s_col_set,
+                name2="score variable",
+                arg2="``s_col``",
+            )
+
+    def _set_score_var(self):
+        """Set the score variable array."""
+        if hasattr(self, '_data') and self.s_col in self.data.columns:
+            self._s = self.data.loc[:, [self.s_col]]
+
+    def __str__(self):
+        """String representation."""
+        data_summary = self._data_summary_str()
+        buf = io.StringIO()
+        print("================== DoubleMLRDDData Object ==================", file=buf)
+        print(f"Score variable: {self.s_col}", file=buf)
+        print(data_summary, file=buf)
+        return buf.getvalue()
diff --git a/doubleml/data/ssm_data.py b/doubleml/data/ssm_data.py
new file mode 100644
index 00000000..d8f3988e
--- /dev/null
+++ b/doubleml/data/ssm_data.py
@@ -0,0 +1,274 @@
+import io
+import numpy as np
+import pandas as pd
+from sklearn.utils.validation import check_array
+
+from doubleml.data.base_data import DoubleMLData
+from doubleml.utils._estimation import _assure_2d_array
+
+
+class DoubleMLSSMData(DoubleMLData):
+    """Double machine learning data-backend for Sample Selection Models.
+
+    :class:`DoubleMLSSMData` objects can be initialized from
+    :class:`pandas.DataFrame`'s as well as :class:`numpy.ndarray`'s.
+
+    Parameters
+    ----------
+    data : :class:`pandas.DataFrame`
+        The data.
+
+    y_col : str
+        The outcome variable.
+
+    d_cols : str or list
+        The treatment variable(s).
+
+    s_col : str
+        The selection variable for SSM models.
+
+    x_cols : None, str or list
+        The covariates.
+        If ``None``, all variables (columns of ``data``) which are neither specified as outcome variable ``y_col``, nor
+        treatment variables ``d_cols``, nor instrumental variables ``z_cols``, nor selection variable ``s_col`` are used as covariates.
+        Default is ``None``.
+
+    z_cols : None, str or list
+        The instrumental variable(s).
+        Default is ``None``.
+
+    cluster_cols : None, str or list
+        The cluster variable(s).
+        Default is ``None``.
+
+    use_other_treat_as_covariate : bool
+        Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
+        Default is ``True``.
+
+    force_all_x_finite : bool or str
+        Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
+        Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
+        allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
+        Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
+        for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
+        in the covariates ``x``.
+        Default is ``True``.
+
+    force_all_d_finite : bool
+        Indicates whether to raise an error on infinite values and / or missings in the treatment variables ``d``.
+        Default is ``True``.
+
+    Examples
+    --------
+    >>> from doubleml import DoubleMLSSMData
+    >>> from doubleml.irm.datasets import make_ssm_data
+    >>> # initialization from pandas.DataFrame
+    >>> df = make_ssm_data(return_type='DataFrame')
+    >>> obj_dml_data_from_df = DoubleMLSSMData(df, 'y', 'd', 's')
+    >>> # initialization from np.ndarray
+    >>> (x, y, d, s) = make_ssm_data(return_type='array')
+    >>> obj_dml_data_from_array = DoubleMLSSMData.from_arrays(x, y, d, s=s)
+    """
+
+    def __init__(
+        self,
+        data,
+        y_col,
+        d_cols,
+        s_col,
+        x_cols=None,
+        z_cols=None,
+        cluster_cols=None,
+        use_other_treat_as_covariate=True,
+        force_all_x_finite=True,
+        force_all_d_finite=True,
+    ):
+        # Set selection column before calling parent constructor
+        self.s_col = s_col
+
+        # Call parent constructor
+        super().__init__(
+            data=data,
+            y_col=y_col,
+            d_cols=d_cols,
+            x_cols=x_cols,
+            z_cols=z_cols,
+            cluster_cols=cluster_cols,
+            use_other_treat_as_covariate=use_other_treat_as_covariate,
+            force_all_x_finite=force_all_x_finite,
+            force_all_d_finite=force_all_d_finite,
+        )
+
+        # Set selection variable array after data is loaded
+        self._set_selection_var()
+
+    @classmethod
+    def from_arrays(
+        cls,
+        x,
+        y,
+        d,
+        s,
+        z=None,
+        cluster_vars=None,
+        use_other_treat_as_covariate=True,
+        force_all_x_finite=True,
+        force_all_d_finite=True,
+    ):
+        """
+        Initialize :class:`DoubleMLSSMData` object from :class:`numpy.ndarray`'s.
+
+        Parameters
+        ----------
+        x : :class:`numpy.ndarray`
+            Array of covariates.
+
+        y : :class:`numpy.ndarray`
+            Array of the outcome variable.
+
+        d : :class:`numpy.ndarray`
+            Array of treatment variables.
+
+        s : :class:`numpy.ndarray`
+            Array of the selection variable for SSM models.
+
+        z : None or :class:`numpy.ndarray`
+            Array of instrumental variables.
+            Default is ``None``.
+
+        cluster_vars : None or :class:`numpy.ndarray`
+            Array of cluster variables.
+            Default is ``None``.
+
+        use_other_treat_as_covariate : bool
+            Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
+            Default is ``True``.
+
+        force_all_x_finite : bool or str
+            Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
+            Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
+            allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
+            Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
+            for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
+            in the covariates ``x``.
+            Default is ``True``.
+
+        force_all_d_finite : bool
+            Indicates whether to raise an error on infinite values and / or missings in the treatment variables ``d``.
+            Default is ``True``.
+
+        Examples
+        --------
+        >>> from doubleml import DoubleMLSSMData
+        >>> from doubleml.irm.datasets import make_ssm_data
+        >>> (x, y, d, s) = make_ssm_data(return_type='array')
+        >>> obj_dml_data_from_array = DoubleMLSSMData.from_arrays(x, y, d, s=s)
+        """
+        # Prepare selection variable
+        s = check_array(s, ensure_2d=False, allow_nd=False)
+        s = _assure_2d_array(s)
+        if s.shape[1] != 1:
+            raise ValueError("s must be a single column.")
+        s_col = "s"
+
+        # Create base data using parent class method
+        base_data = DoubleMLData.from_arrays(
+            x, y, d, z, cluster_vars, use_other_treat_as_covariate, force_all_x_finite, force_all_d_finite
+        )
+
+        # Add selection variable to the DataFrame
+        data = pd.concat((base_data.data, pd.DataFrame(s, columns=[s_col])), axis=1)
+
+        return cls(
+            data,
+            base_data.y_col,
+            base_data.d_cols,
+            s_col,
+            base_data.x_cols,
+            base_data.z_cols,
+            base_data.cluster_cols,
+            base_data.use_other_treat_as_covariate,
+            base_data.force_all_x_finite,
+            base_data.force_all_d_finite,
+        )
+
+    @property
+    def s_col(self):
+        """
+        The selection variable.
+        """
+        return self._s_col
+
+    @s_col.setter
+    def s_col(self, value):
+        if not isinstance(value, str):
+            raise TypeError(
+                "The selection variable s_col must be of str type. "
+                f"{str(value)} of type {str(type(value))} was passed."
+            )
+        # Check if data exists (during initialization it might not)
+        if hasattr(self, '_data') and value not in self.all_variables:
+            raise ValueError("Invalid selection variable s_col. The selection variable is no data column.")
+        self._s_col = value
+        # Update selection variable array if data is already loaded
+        if hasattr(self, '_data'):
+            self._set_selection_var()
+
+    @property
+    def s(self):
+        """
+        Array of selection variable.
+        """
+        return self._s.values
+
+    def _get_optional_col_sets(self):
+        """Get optional column sets including selection column."""
+        base_optional_col_sets = super()._get_optional_col_sets()
+        s_col_set = {self.s_col}
+        return [s_col_set] + base_optional_col_sets
+
+    def _check_disjoint_sets(self):
+        """Check that selection column doesn't overlap with other variables."""
+        # Apply standard checks from parent class
+        super()._check_disjoint_sets()
+        self._check_disjoint_sets_s_col()
+
+    def _check_disjoint_sets_s_col(self):
+        """Check that selection column is disjoint from other variable sets."""
+        s_col_set = {self.s_col}
+        y_col_set = {self.y_col}
+        x_cols_set = set(self.x_cols)
+        d_cols_set = set(self.d_cols)
+        z_cols_set = set(self.z_cols or [])
+        cluster_cols_set = set(self.cluster_cols or [])
+
+        s_checks_args = [
+            (y_col_set, "outcome variable", "``y_col``"),
+            (d_cols_set, "treatment variable", "``d_cols``"),
+            (x_cols_set, "covariate", "``x_cols``"),
+            (z_cols_set, "instrumental variable", "``z_cols``"),
+            (cluster_cols_set, "cluster variable(s)", "``cluster_cols``"),
+        ]
+        for set1, name, argument in s_checks_args:
+            self._check_disjoint(
+                set1=set1,
+                name1=name,
+                arg1=argument,
+                set2=s_col_set,
+                name2="selection variable",
+                arg2="``s_col``",
+            )
+
+    def _set_selection_var(self):
+        """Set the selection variable array."""
+        if hasattr(self, '_data') and self.s_col in self.data.columns:
+            self._s = self.data.loc[:, [self.s_col]]
+
+    def __str__(self):
+        """String representation."""
+        data_summary = self._data_summary_str()
+        buf = io.StringIO()
+        print("================== DoubleMLSSMData Object ==================", file=buf)
+        print(f"Selection variable: {self.s_col}", file=buf)
+        print(data_summary, file=buf)
+        return buf.getvalue()
diff --git a/doubleml/data/tests/test_cluster_data.py b/doubleml/data/tests/test_cluster_data.py
index b02a3275..09a45ccd 100644
--- a/doubleml/data/tests/test_cluster_data.py
+++ b/doubleml/data/tests/test_cluster_data.py
@@ -2,20 +2,22 @@
 import pandas as pd
 import pytest
 
-from doubleml import DoubleMLClusterData
+from doubleml import DoubleMLData
 from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018
 
 
 @pytest.mark.ci
 def test_obj_vs_from_arrays():
     np.random.seed(3141)
-    dml_data = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
-    dml_data_from_array = DoubleMLClusterData.from_arrays(
+    (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="array")
+    dml_data = DoubleMLData.from_arrays(x, y, d, z=z, cluster_vars=cluster_vars, is_cluster_data=True)
+    dml_data_from_array = DoubleMLData.from_arrays(
         dml_data.data[dml_data.x_cols],
         dml_data.data[dml_data.y_col],
         dml_data.data[dml_data.d_cols],
-        dml_data.data[dml_data.cluster_cols],
-        dml_data.data[dml_data.z_cols],
+        z=dml_data.data[dml_data.z_cols],
+        cluster_vars=dml_data.data[dml_data.cluster_cols],
+        is_cluster_data=True
     )
     df = dml_data.data.copy()
     df.rename(
@@ -24,12 +26,13 @@ def test_obj_vs_from_arrays():
     assert dml_data_from_array.data.equals(df)
 
     # with a single cluster variable
-    dml_data_from_array = DoubleMLClusterData.from_arrays(
+    dml_data_from_array = DoubleMLData.from_arrays(
         dml_data.data[dml_data.x_cols],
         dml_data.data[dml_data.y_col],
         dml_data.data[dml_data.d_cols],
-        dml_data.data[dml_data.cluster_cols[1]],
-        dml_data.data[dml_data.z_cols],
+        z=dml_data.data[dml_data.z_cols],
+        cluster_vars=dml_data.data[dml_data.cluster_cols[1]],
+        is_cluster_data=True
     )
     df = dml_data.data.copy().drop(columns="cluster_var_i")
     df.rename(columns={"cluster_var_j": "cluster_var", "Y": "y", "D": "d", "Z": "z"}, inplace=True)
@@ -39,7 +42,7 @@ def test_obj_vs_from_arrays():
 @pytest.mark.ci
 def test_x_cols_setter_defaults_w_cluster():
     df = pd.DataFrame(np.tile(np.arange(6), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "xx3", "cluster1"])
-    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1")
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", is_cluster_data=True)
     assert dml_data.x_cols == ["xx1", "xx2", "xx3"]
     dml_data.x_cols = ["xx1", "xx3"]
     assert dml_data.x_cols == ["xx1", "xx3"]
@@ -48,48 +51,53 @@ def test_x_cols_setter_defaults_w_cluster():
 
     # with instrument
     df = pd.DataFrame(np.tile(np.arange(6), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "z", "cluster1"])
-    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="z")
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="z", is_cluster_data=True)
     assert dml_data.x_cols == ["xx1", "xx2"]
 
     # without instrument and with time
     df = pd.DataFrame(np.tile(np.arange(6), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "tt", "cluster1"])
-    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", t_col="tt")
-    assert dml_data.x_cols == ["xx1", "xx2"]
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", is_cluster_data=True)
+    assert dml_data.x_cols == ["xx1", "xx2", "tt"]
 
     # with instrument and with time
     df = pd.DataFrame(np.tile(np.arange(7), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "tt", "cluster1"])
-    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", t_col="tt")
-    assert dml_data.x_cols == ["xx1", "xx2"]
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", is_cluster_data=True)
+    assert dml_data.x_cols == ["xx1", "xx2", "tt"]
 
     # without instrument and with selection
     df = pd.DataFrame(np.tile(np.arange(6), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "ss", "cluster1"])
-    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", s_col="ss")
-    assert dml_data.x_cols == ["xx1", "xx2"]
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", is_cluster_data=True)
+    assert dml_data.x_cols == ["xx1", "xx2", "ss"]
 
     # with instrument and with selection
     df = pd.DataFrame(np.tile(np.arange(7), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "ss", "cluster1"])
-    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", s_col="ss")
-    assert dml_data.x_cols == ["xx1", "xx2"]
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", is_cluster_data=True)
+    assert dml_data.x_cols == ["xx1", "xx2", "ss"]
 
     # without instrument with time with selection
     df = pd.DataFrame(np.tile(np.arange(7), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "tt", "ss", "cluster1"])
-    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", t_col="tt", s_col="ss")
-    assert dml_data.x_cols == ["xx1", "xx2"]
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", is_cluster_data=True)
+    assert dml_data.x_cols == ["xx1", "xx2", "tt", "ss"]
 
     # with instrument with time with selection
     df = pd.DataFrame(np.tile(np.arange(8), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "tt", "ss", "cluster1"])
-    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", t_col="tt", s_col="ss")
-    assert dml_data.x_cols == ["xx1", "xx2"]
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", is_cluster_data=True)
+    assert dml_data.x_cols == ["xx1", "xx2", "tt", "ss"]
 
 
 @pytest.mark.ci
 def test_cluster_cols_setter():
     np.random.seed(3141)
-    dml_data = make_plr_CCDDHNR2018(n_obs=100)
-    df = dml_data.data.copy().iloc[:, :10]
-    df.columns = [f"X{i + 1}" for i in np.arange(7)] + ["y", "d1", "d2"]
-    dml_data = DoubleMLClusterData(
-        df, "y", ["d1", "d2"], cluster_cols=[f"X{i + 1}" for i in [5, 6]], x_cols=[f"X{i + 1}" for i in np.arange(5)]
+    (x, y, d) = make_plr_CCDDHNR2018(n_obs=100, return_type="array")
+    # Create a pandas DataFrame with X, y, and d columns
+    df = pd.DataFrame(np.column_stack((x[:, :7], y, d)), 
+                     columns=[f"X{i + 1}" for i in np.arange(7)] + ["y", "d1", "d2"])
+
+    dml_data = DoubleMLData(
+        df, "y", ["d1", "d2"], 
+        x_cols=[f"X{i + 1}" for i in np.arange(5)],
+        cluster_cols=[f"X{i + 1}" for i in [5, 6]],
+        is_cluster_data=True
     )
 
     cluster_vars = df[["X6", "X7"]].values
@@ -129,56 +137,49 @@ def test_disjoint_sets():
         r"and cluster variable\(s\) \(``cluster_cols``\)."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="yy")
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="yy", is_cluster_data=True)
     msg = (
         r"At least one variable/column is set as treatment variable \(``d_cols``\) "
         r"and cluster variable\(s\) \(``cluster_cols``\)."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="dd1")
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="dd1", is_cluster_data=True)
     msg = (
         r"At least one variable/column is set as covariate \(``x_cols``\) " r"and cluster variable\(s\) \(``cluster_cols``\)."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="xx2")
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="xx2", is_cluster_data=True)
 
     msg = (
         r"At least one variable/column is set as instrumental variable \(``z_cols``\) "
         r"and cluster variable\(s\) \(``cluster_cols``\)."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], z_cols=["xx2"], cluster_cols="xx2")
-
-    msg = (
-        r"At least one variable/column is set as time variable \(``t_col``\) "
-        r"and cluster variable\(s\) \(``cluster_cols``\)."
-    )
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], t_col="xx2", cluster_cols="xx2")
-
-    msg = (
-        r"At least one variable/column is set as score or selection variable \(``s_col``\) "
-        r"and cluster variable\(s\) \(``cluster_cols``\)."
-    )
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], s_col="xx2", cluster_cols="xx2")
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], z_cols=["xx2"], cluster_cols="xx2", is_cluster_data=True)
 
 
 @pytest.mark.ci
 def test_duplicates():
     np.random.seed(3141)
-    dml_cluster_data = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
+    (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="array")
+    df = pd.DataFrame(np.column_stack((x, y, d, z)), 
+                     columns=[f"X{i+1}" for i in range(x.shape[1])] + ["Y", "D", "Z"])
+    cluster_df = pd.DataFrame(cluster_vars, columns=["cluster_var_i", "cluster_var_j"])
+    data = pd.concat([df, cluster_df], axis=1)
 
     msg = r"Invalid cluster variable\(s\) cluster_cols: Contains duplicate values."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(dml_cluster_data.data, y_col="y", d_cols=["d"], cluster_cols=["X3", "X2", "X3"])
+        _ = DoubleMLData(data, y_col="Y", d_cols=["D"], cluster_cols=["X3", "X2", "X3"], is_cluster_data=True)
+
+    dml_data = DoubleMLData(data, y_col="Y", d_cols=["D"], cluster_cols=["X3", "X2"], is_cluster_data=True)
     with pytest.raises(ValueError, match=msg):
-        dml_cluster_data.cluster_cols = ["X3", "X2", "X3"]
+        dml_data.cluster_cols = ["X3", "X2", "X3"]
 
     msg = "Invalid pd.DataFrame: Contains duplicate column names."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(
-            pd.DataFrame(np.zeros((100, 5)), columns=["y", "d", "X3", "X2", "y"]), y_col="y", d_cols=["d"], cluster_cols=["X2"]
+        _ = DoubleMLData(
+            pd.DataFrame(np.zeros((100, 5)), columns=["y", "d", "X3", "X2", "y"]), 
+            y_col="y", d_cols=["d"], cluster_cols=["X2"], is_cluster_data=True
         )
 
 
@@ -186,45 +187,29 @@ def test_duplicates():
 def test_dml_datatype():
     data_array = np.zeros((100, 10))
     with pytest.raises(TypeError):
-        _ = DoubleMLClusterData(data_array, y_col="y", d_cols=["d"], cluster_cols=["X3", "X2"])
+        _ = DoubleMLData(data_array, y_col="y", d_cols=["d"], cluster_cols=["X3", "X2"], is_cluster_data=True)
 
 
 @pytest.mark.ci
 def test_cluster_data_str():
     np.random.seed(3141)
-    dml_data = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
+    (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="array")
+    dml_data = DoubleMLData.from_arrays(x, y, d, z=z, cluster_vars=cluster_vars, is_cluster_data=True)
 
     # Convert the object to string
     dml_str = str(dml_data)
 
     # Check that all important sections are present in the string
-    assert "================== DoubleMLClusterData Object ==================" in dml_str
+    assert "================== DoubleMLData Object ==================" in dml_str
     assert "------------------ Data summary      ------------------" in dml_str
     assert "------------------ DataFrame info    ------------------" in dml_str
 
     # Check that specific data attributes are correctly included
-    assert "Outcome variable: Y" in dml_str
-    assert "Treatment variable(s): ['D']" in dml_str
-    assert "Cluster variable(s): ['cluster_var_i', 'cluster_var_j']" in dml_str
+    assert "Outcome variable: y" in dml_str
+    assert "Treatment variable(s): ['d']" in dml_str
+    assert "Cluster variable(s): ['cluster_var1', 'cluster_var2']" in dml_str
     assert "Covariates: " in dml_str
-    assert "Instrument variable(s): ['Z']" in dml_str
-    assert "No. Observations:" in dml_str
-
-    # Test with additional optional attributes
-    df = dml_data.data.copy()
-    df["time_var"] = 1
-    df["score_var"] = 0.5
-
-    dml_data_with_optional = DoubleMLClusterData(
-        data=df,
-        y_col="Y",
-        d_cols="D",
-        cluster_cols=["cluster_var_i", "cluster_var_j"],
-        z_cols="Z",
-        t_col="time_var",
-        s_col="score_var",
-    )
-
-    dml_str_optional = str(dml_data_with_optional)
-    assert "Time variable: time_var" in dml_str_optional
-    assert "Score/Selection variable: score_var" in dml_str_optional
+    assert "Instrument variable(s): ['z']" in dml_str
+    assert "Is cluster data: True" in dml_str
+    assert "No. Observations:" in dml_str    # There's no TimeData or ScoreData here anymore, so the test is complete
+    # The specialized data classes will be tested in their own test files
diff --git a/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py b/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
index df2b4cbe..a882c678 100644
--- a/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
+++ b/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
@@ -2,7 +2,7 @@
 import pandas as pd
 from scipy.linalg import toeplitz
 
-from doubleml.data import DoubleMLClusterData
+from doubleml.data import DoubleMLData
 from doubleml.utils._aliases import _array_alias, _data_frame_alias, _dml_cluster_data_alias
 
 
@@ -184,9 +184,7 @@ def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1.0, return
     y = d * theta + np.matmul(x, zeta_0) + eps
 
     cluster_cols = ["cluster_var_i", "cluster_var_j"]
-    cluster_vars = pd.MultiIndex.from_product([range(N), range(M)]).to_frame(name=cluster_cols).reset_index(drop=True)
-
-    if return_type in _array_alias:
+    cluster_vars = pd.MultiIndex.from_product([range(N), range(M)]).to_frame(name=cluster_cols).reset_index(drop=True)    if return_type in _array_alias:
         return x, y, d, cluster_vars.values, z
     elif return_type in _data_frame_alias + _dml_cluster_data_alias:
         x_cols = [f"X{i + 1}" for i in np.arange(dim_X)]
@@ -194,6 +192,6 @@ def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1.0, return
         if return_type in _data_frame_alias:
             return data
         else:
-            return DoubleMLClusterData(data, "Y", "D", cluster_cols, x_cols, "Z")
+            return DoubleMLData(data, "Y", "D", x_cols, "Z", cluster_cols, is_cluster_data=True)
     else:
         raise ValueError("Invalid return_type.")
diff --git a/doubleml/tests/test_multiway_cluster.py b/doubleml/tests/test_multiway_cluster.py
index 10e5d445..c3425239 100644
--- a/doubleml/tests/test_multiway_cluster.py
+++ b/doubleml/tests/test_multiway_cluster.py
@@ -18,9 +18,10 @@
 M = 25  # number of observations (second dimension)
 dim_x = 100  # dimension of x
 
-obj_dml_cluster_data = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x)
+(x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x, return_type="array")
+obj_dml_cluster_data = dml.DoubleMLData.from_arrays(x, y, d, z=z, cluster_vars=cluster_vars, is_cluster_data=True)
 
-obj_dml_oneway_cluster_data = make_pliv_multiway_cluster_CKMS2021(
+(x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(
     N,
     M,
     dim_x,
@@ -28,9 +29,11 @@
     omega_epsilon=np.array([0.25, 0]),
     omega_v=np.array([0.25, 0]),
     omega_V=np.array([0.25, 0]),
+    return_type="array"
 )
+obj_dml_oneway_cluster_data = dml.DoubleMLData.from_arrays(x, y, d, z=z, cluster_vars=cluster_vars, is_cluster_data=True)
 # only the first cluster variable is relevant with the weight setting above
-obj_dml_oneway_cluster_data.cluster_cols = "cluster_var_i"
+obj_dml_oneway_cluster_data.cluster_cols = "cluster_var1"
 
 
 @pytest.fixture(
diff --git a/doubleml/tests/test_nonlinear_cluster.py b/doubleml/tests/test_nonlinear_cluster.py
index 71998941..9a2c585a 100644
--- a/doubleml/tests/test_nonlinear_cluster.py
+++ b/doubleml/tests/test_nonlinear_cluster.py
@@ -7,7 +7,7 @@
 from sklearn.linear_model import Lasso, LinearRegression
 
 import doubleml as dml
-from doubleml import DoubleMLClusterData
+from doubleml import DoubleMLData
 from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
 
 from .test_nonlinear_score_mixin import DoubleMLPLRWithNonLinearScoreMixin
@@ -20,7 +20,7 @@
 
 # create data without insturment for plr
 x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x, return_type="array")
-obj_dml_cluster_data = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
+obj_dml_cluster_data = DoubleMLData.from_arrays(x, y, d, cluster_vars=cluster_vars, is_cluster_data=True)
 
 x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(
     N,
@@ -32,7 +32,7 @@
     omega_V=np.array([0.25, 0]),
     return_type="array",
 )
-obj_dml_oneway_cluster_data = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
+obj_dml_oneway_cluster_data = DoubleMLData.from_arrays(x, y, d, cluster_vars=cluster_vars, is_cluster_data=True)
 
 # only the first cluster variable is relevant with the weight setting above
 obj_dml_oneway_cluster_data.cluster_cols = "cluster_var1"
@@ -188,15 +188,14 @@ def dml_plr_cluster_nonlinear_with_index(generate_data1, learner):
 
     # Set machine learning methods for m & l
     ml_l = clone(learner)
-    ml_m = clone(learner)
-
+    ml_m = clone(learner)    
     obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols)
     np.random.seed(3141)
     dml_plr_obj = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data, ml_l, ml_m, n_folds=n_folds)
     dml_plr_obj.fit()
-
+    
     df = data.reset_index()
-    dml_cluster_data = dml.DoubleMLClusterData(df, y_col="y", d_cols="d", x_cols=x_cols, cluster_cols="index")
+    dml_cluster_data = dml.DoubleMLData(df, y_col="y", d_cols="d", x_cols=x_cols, cluster_cols="index", is_cluster_data=True)
     np.random.seed(3141)
     dml_plr_cluster_obj = DoubleMLPLRWithNonLinearScoreMixin(dml_cluster_data, ml_l, ml_m, n_folds=n_folds)
     dml_plr_cluster_obj.fit()
diff --git a/doubleml/tests/test_sensitivity_cluster.py b/doubleml/tests/test_sensitivity_cluster.py
index 83f8c270..a4b46e1a 100644
--- a/doubleml/tests/test_sensitivity_cluster.py
+++ b/doubleml/tests/test_sensitivity_cluster.py
@@ -17,7 +17,7 @@
 
 
 (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x, return_type="array")
-obj_dml_cluster_data = dml.DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
+obj_dml_cluster_data = dml.DoubleMLData.from_arrays(x, y, d, z=None, cluster_vars=cluster_vars, is_cluster_data=True)
 
 (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(
     N,
@@ -29,7 +29,7 @@
     omega_V=np.array([0.25, 0]),
     return_type="array",
 )
-obj_dml_oneway_cluster_data = dml.DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
+obj_dml_oneway_cluster_data = dml.DoubleMLData.from_arrays(x, y, d, z=None, cluster_vars=cluster_vars, is_cluster_data=True)
 # only the first cluster variable is relevant with the weight setting above
 obj_dml_oneway_cluster_data.cluster_cols = "cluster_var1"
 
diff --git a/doubleml/utils/_aliases.py b/doubleml/utils/_aliases.py
index e52a5818..679c80d3 100644
--- a/doubleml/utils/_aliases.py
+++ b/doubleml/utils/_aliases.py
@@ -1,12 +1,13 @@
 import numpy as np
 import pandas as pd
 
-from doubleml.data import DoubleMLClusterData, DoubleMLData
+from doubleml.data import DoubleMLData
 
 _array_alias = ["array", "np.ndarray", "np.array", np.ndarray]
 _data_frame_alias = ["DataFrame", "pd.DataFrame", pd.DataFrame]
 _dml_data_alias = ["DoubleMLData", DoubleMLData]
-_dml_cluster_data_alias = ["DoubleMLClusterData", DoubleMLClusterData]
+# For backwards compatibility, DoubleMLClusterData is now an alias for DoubleMLData with is_cluster_data=True
+_dml_cluster_data_alias = ["DoubleMLClusterData", "DoubleMLData"]
 
 
 def _get_array_alias():

From a2566cbb1d8138885091e2f7516919a5aef1d3d5 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 4 Jun 2025 23:01:11 +0200
Subject: [PATCH 21/84] upd

---
 doubleml/data/__init__.py | 71 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/doubleml/data/__init__.py b/doubleml/data/__init__.py
index dfe673e7..4c235a57 100644
--- a/doubleml/data/__init__.py
+++ b/doubleml/data/__init__.py
@@ -2,14 +2,85 @@
 The :mod:`doubleml.data` module implements data classes for double machine learning.
 """
 
+import warnings
+
 from .base_data import DoubleMLData
 from .did_data import DoubleMLDIDData
 from .panel_data import DoubleMLPanelData
 from .rdd_data import DoubleMLRDDData
 from .ssm_data import DoubleMLSSMData
 
+
+class DoubleMLClusterData(DoubleMLData):
+    """
+    Backwards compatibility wrapper for DoubleMLData with is_cluster_data=True.
+    
+    This class is deprecated and will be removed in a future version.
+    Use DoubleMLData with is_cluster_data=True instead.
+    """
+    
+    def __init__(
+        self,
+        data,
+        y_col,
+        d_cols,
+        cluster_cols,
+        x_cols=None,
+        z_cols=None,
+        t_col=None,
+        s_col=None,
+        use_other_treat_as_covariate=True,
+        force_all_x_finite=True,
+    ):
+        warnings.warn(
+            "DoubleMLClusterData is deprecated. "
+            "Use DoubleMLData with is_cluster_data=True instead.",
+            FutureWarning,
+            stacklevel=2,
+        )
+        super().__init__(
+            data=data,
+            y_col=y_col,
+            d_cols=d_cols, 
+            x_cols=x_cols,
+            z_cols=z_cols,
+            cluster_cols=cluster_cols,
+            use_other_treat_as_covariate=use_other_treat_as_covariate,
+            force_all_x_finite=force_all_x_finite,
+            force_all_d_finite=True,
+            is_cluster_data=True,
+        )
+    
+    @classmethod
+    def from_arrays(
+        cls, x, y, d, cluster_vars, z=None, t=None, s=None, use_other_treat_as_covariate=True, force_all_x_finite=True
+    ):
+        """
+        Initialize :class:`DoubleMLClusterData` from :class:`numpy.ndarray`'s.
+        This method is deprecated, use DoubleMLData.from_arrays with is_cluster_data=True instead.
+        """
+        warnings.warn(
+            "DoubleMLClusterData is deprecated. "
+            "Use DoubleMLData.from_arrays with is_cluster_data=True instead.",
+            FutureWarning,
+            stacklevel=2,
+        )
+        return DoubleMLData.from_arrays(
+            x=x, 
+            y=y, 
+            d=d, 
+            z=z,
+            cluster_vars=cluster_vars,
+            use_other_treat_as_covariate=use_other_treat_as_covariate,
+            force_all_x_finite=force_all_x_finite,
+            force_all_d_finite=True,
+            is_cluster_data=True,
+        )
+
+
 __all__ = [
     "DoubleMLData",
+    "DoubleMLClusterData",
     "DoubleMLDIDData",
     "DoubleMLPanelData",
     "DoubleMLRDDData",

From 9ef4e53f975c5feb577aeab59ce67f63530640cd Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 5 Jun 2025 06:57:05 +0200
Subject: [PATCH 22/84] update lambda and p calculation in did_cs

---
 doubleml/did/did_cs.py                     | 8 ++------
 doubleml/did/tests/_utils_did_cs_manual.py | 8 ++++----
 doubleml/did/tests/_utils_did_manual.py    | 2 +-
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py
index ab2af5b9..5984399c 100644
--- a/doubleml/did/did_cs.py
+++ b/doubleml/did/did_cs.py
@@ -219,14 +219,10 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
         # THIS DIFFERS FROM THE PAPER due to stratified splitting this should be the same for each fold
         # nuisance estimates of the uncond. treatment prob.
-        p_hat = np.full_like(d, np.nan, dtype="float64")
-        for train_index, test_index in smpls:
-            p_hat[test_index] = np.mean(d[train_index])
+        p_hat = np.full_like(d, d.mean(), dtype="float64")
 
         # nuisance estimates of the uncond. time prob.
-        lambda_hat = np.full_like(t, np.nan, dtype="float64")
-        for train_index, test_index in smpls:
-            lambda_hat[test_index] = np.mean(t[train_index])
+        lambda_hat = np.full_like(t, t.mean(), dtype="float64")
 
         # nuisance g
         smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t)
diff --git a/doubleml/did/tests/_utils_did_cs_manual.py b/doubleml/did/tests/_utils_did_cs_manual.py
index f14a52a0..ce6f8870 100644
--- a/doubleml/did/tests/_utils_did_cs_manual.py
+++ b/doubleml/did/tests/_utils_did_cs_manual.py
@@ -178,12 +178,12 @@ def fit_nuisance_did_cs(
             m_hat_list.append(np.zeros_like(g_hat_d1_t1_list[idx], dtype="float64"))
 
     p_hat_list = []
-    for train_index, _ in smpls:
-        p_hat_list.append(np.mean(d[train_index]))
+    for _ in smpls:
+        p_hat_list.append(np.mean(d))
 
     lambda_hat_list = []
-    for train_index, _ in smpls:
-        lambda_hat_list.append(np.mean(t[train_index]))
+    for _ in smpls:
+        lambda_hat_list.append(np.mean(t))
 
     return g_hat_d0_t0_list, g_hat_d0_t1_list, g_hat_d1_t0_list, g_hat_d1_t1_list, m_hat_list, p_hat_list, lambda_hat_list
 
diff --git a/doubleml/did/tests/_utils_did_manual.py b/doubleml/did/tests/_utils_did_manual.py
index e314c301..b067e44d 100644
--- a/doubleml/did/tests/_utils_did_manual.py
+++ b/doubleml/did/tests/_utils_did_manual.py
@@ -104,7 +104,7 @@ def fit_nuisance_did(
         m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold)
 
     p_hat_list = []
-    for train_index, _ in smpls:
+    for _ in smpls:
         p_hat_list.append(np.mean(d))
 
     return g_hat0_list, g_hat1_list, m_hat_list, p_hat_list

From e90441b9366f2c46daecd01809575635a56faeb8 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 5 Jun 2025 11:18:11 +0200
Subject: [PATCH 23/84] add _score_dim property to doubleml class

---
 doubleml/double_ml.py | 57 +++++++++++++++----------------------------
 1 file changed, 20 insertions(+), 37 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 911487a3..c2d3727b 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -103,16 +103,7 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting):
 
         self._score_dim = (self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs)
         # initialize arrays according to obj_dml_data and the resampling settings
-        (
-            self._psi,
-            self._psi_deriv,
-            self._psi_elements,
-            self._var_scaling_factors,
-            self._coef,
-            self._se,
-            self._all_coef,
-            self._all_se,
-        ) = self._initialize_arrays()
+        self._initialize_arrays()
 
         # initialize instance attributes which are later used for iterating
         self._i_rep = None
@@ -1075,22 +1066,20 @@ def _fit_sensitivity_elements(self, nuisance_predictions):
 
     def _initialize_arrays(self):
         # scores
-        psi = np.full(self._score_dim, np.nan)
-        psi_deriv = np.full(self._score_dim, np.nan)
-        psi_elements = self._initialize_score_elements(self._score_dim)
+        self._psi = np.full(self._score_dim, np.nan)
+        self._psi_deriv = np.full(self._score_dim, np.nan)
+        self._psi_elements = self._initialize_score_elements(self._score_dim)
 
         n_rep = self._score_dim[1]
         n_thetas = self._score_dim[2]
 
-        var_scaling_factors = np.full(n_thetas, np.nan)
+        self._var_scaling_factors = np.full(n_thetas, np.nan)
         # coefficients and ses
-        coef = np.full(n_thetas, np.nan)
-        se = np.full(n_thetas, np.nan)
+        self._coef = np.full(n_thetas, np.nan)
+        self._se = np.full(n_thetas, np.nan)
 
-        all_coef = np.full((n_thetas, n_rep), np.nan)
-        all_se = np.full((n_thetas, n_rep), np.nan)
-
-        return psi, psi_deriv, psi_elements, var_scaling_factors, coef, se, all_coef, all_se
+        self._all_coef = np.full((n_thetas, n_rep), np.nan)
+        self._all_se = np.full((n_thetas, n_rep), np.nan)
 
     def _initialize_predictions_and_targets(self):
         self._predictions = {learner: np.full(self._score_dim, np.nan) for learner in self.params_names}
@@ -1211,7 +1200,7 @@ def evaluate_learners(self, learners=None, metric=_rmse):
                 f"The learners have to be a subset of {str(self.params_names)}. Learners {str(learners)} provided."
             )
 
-    def draw_sample_splitting(self):
+    def draw_sample_splitting(self, n_obs=None):
         """
         Draw sample splitting for DoubleML models.
 
@@ -1221,26 +1210,27 @@ def draw_sample_splitting(self):
         Parameters
         ----------
         n_obs : int or None
-            The number of observations. If ``None``, the number of observations is set to the number of observations in
-            the data set.
+            The number of observations to resample. If ``None``, the number of observations is set to the number
+            of observations in the data set.
 
         Returns
         -------
         self : object
         """
+        if n_obs is None:
+            n_obs = self.n_obs
+
         if self._is_cluster_data:
             obj_dml_resampling = DoubleMLClusterResampling(
                 n_folds=self._n_folds_per_cluster,
                 n_rep=self.n_rep,
-                n_obs=self.n_obs,
+                n_obs=n_obs,
                 n_cluster_vars=self._dml_data.n_cluster_vars,
                 cluster_vars=self._dml_data.cluster_vars,
             )
             self._smpls, self._smpls_cluster = obj_dml_resampling.split_samples()
         else:
-            obj_dml_resampling = DoubleMLResampling(
-                n_folds=self.n_folds, n_rep=self.n_rep, n_obs=self.n_obs, stratify=self._strata
-            )
+            obj_dml_resampling = DoubleMLResampling(n_folds=self.n_folds, n_rep=self.n_rep, n_obs=n_obs, stratify=self._strata)
             self._smpls = obj_dml_resampling.split_samples()
 
         return self
@@ -1309,16 +1299,9 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
             all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data, n_obs=self.n_obs
         )
 
-        (
-            self._psi,
-            self._psi_deriv,
-            self._psi_elements,
-            self._var_scaling_factors,
-            self._coef,
-            self._se,
-            self._all_coef,
-            self._all_se,
-        ) = self._initialize_arrays()
+        # set sample splitting can update the number of repetitions
+        self._score_dim = (self._score_dim[0], self._n_rep, self._score_dim[2])
+        self._initialize_arrays()
         self._initialize_ml_nuisance_params()
 
         return self

From eb19efef0278c54831bc5b71567b96cf2f90e7da Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Thu, 5 Jun 2025 14:04:14 +0200
Subject: [PATCH 24/84] upd 305

---
 doubleml/__init__.py                          |   6 +-
 doubleml/data/__init__.py                     |  29 +-
 doubleml/data/base_data_content.txt           | Bin 60862 -> 0 bytes
 doubleml/data/cluster_data.py                 | 285 ------------------
 doubleml/data/did_data.py                     |  69 +++--
 doubleml/data/panel_data.py                   |  89 ++++--
 doubleml/data/rdd_data.py                     |  82 ++---
 doubleml/data/ssm_data.py                     |  13 +-
 doubleml/data/tests/test_cluster_data.py      | 127 ++++----
 doubleml/data/tests/test_dml_data.py          |  14 +-
 doubleml/did/datasets/dgp_did_SZ2020.py       |  25 +-
 .../dgp_pliv_multiway_cluster_CKMS2021.py     |   8 +-
 doubleml/tests/test_exceptions_fixed.py       |   0
 doubleml/tests/test_multiway_cluster.py       |   9 +-
 doubleml/tests/test_nonlinear_cluster.py      |  13 +-
 doubleml/tests/test_return_types_fixed.py     |   0
 doubleml/tests/test_sensitivity_cluster.py    |   4 +-
 doubleml/utils/_aliases.py                    |  36 ++-
 doubleml/utils/_check_return_types_fixed.py   |   0
 19 files changed, 313 insertions(+), 496 deletions(-)
 delete mode 100644 doubleml/data/base_data_content.txt
 delete mode 100644 doubleml/data/cluster_data.py
 create mode 100644 doubleml/tests/test_exceptions_fixed.py
 create mode 100644 doubleml/tests/test_return_types_fixed.py
 create mode 100644 doubleml/utils/_check_return_types_fixed.py

diff --git a/doubleml/__init__.py b/doubleml/__init__.py
index 102ea995..6cf7de96 100644
--- a/doubleml/__init__.py
+++ b/doubleml/__init__.py
@@ -1,6 +1,6 @@
 import importlib.metadata
 
-from .data import DoubleMLClusterData, DoubleMLData
+from .data import DoubleMLClusterData, DoubleMLData, DoubleMLDIDData, DoubleMLPanelData, DoubleMLRDDData, DoubleMLSSMData
 from .did.did import DoubleMLDID
 from .did.did_cs import DoubleMLDIDCS
 from .double_ml_framework import DoubleMLFramework, concat
@@ -29,6 +29,10 @@
     "DoubleMLIIVM",
     "DoubleMLData",
     "DoubleMLClusterData",
+    "DoubleMLDIDData",
+    "DoubleMLPanelData",
+    "DoubleMLRDDData",
+    "DoubleMLSSMData",
     "DoubleMLDID",
     "DoubleMLDIDCS",
     "DoubleMLPQ",
diff --git a/doubleml/data/__init__.py b/doubleml/data/__init__.py
index 4c235a57..7d368b76 100644
--- a/doubleml/data/__init__.py
+++ b/doubleml/data/__init__.py
@@ -2,6 +2,7 @@
 The :mod:`doubleml.data` module implements data classes for double machine learning.
 """
 
+from .base_data import DoubleMLData
 import warnings
 
 from .base_data import DoubleMLData
@@ -14,11 +15,10 @@
 class DoubleMLClusterData(DoubleMLData):
     """
     Backwards compatibility wrapper for DoubleMLData with is_cluster_data=True.
-    
     This class is deprecated and will be removed in a future version.
     Use DoubleMLData with is_cluster_data=True instead.
     """
-    
+
     def __init__(
         self,
         data,
@@ -33,15 +33,14 @@ def __init__(
         force_all_x_finite=True,
     ):
         warnings.warn(
-            "DoubleMLClusterData is deprecated. "
-            "Use DoubleMLData with is_cluster_data=True instead.",
+            "DoubleMLClusterData is deprecated. " "Use DoubleMLData with is_cluster_data=True instead.",
             FutureWarning,
             stacklevel=2,
         )
         super().__init__(
             data=data,
             y_col=y_col,
-            d_cols=d_cols, 
+            d_cols=d_cols,
             x_cols=x_cols,
             z_cols=z_cols,
             cluster_cols=cluster_cols,
@@ -50,7 +49,7 @@ def __init__(
             force_all_d_finite=True,
             is_cluster_data=True,
         )
-    
+
     @classmethod
     def from_arrays(
         cls, x, y, d, cluster_vars, z=None, t=None, s=None, use_other_treat_as_covariate=True, force_all_x_finite=True
@@ -60,15 +59,14 @@ def from_arrays(
         This method is deprecated, use DoubleMLData.from_arrays with is_cluster_data=True instead.
         """
         warnings.warn(
-            "DoubleMLClusterData is deprecated. "
-            "Use DoubleMLData.from_arrays with is_cluster_data=True instead.",
+            "DoubleMLClusterData is deprecated. " "Use DoubleMLData.from_arrays with is_cluster_data=True instead.",
             FutureWarning,
             stacklevel=2,
         )
         return DoubleMLData.from_arrays(
-            x=x, 
-            y=y, 
-            d=d, 
+            x=x,
+            y=y,
+            d=d,
             z=z,
             cluster_vars=cluster_vars,
             use_other_treat_as_covariate=use_other_treat_as_covariate,
@@ -78,11 +76,4 @@ def from_arrays(
         )
 
 
-__all__ = [
-    "DoubleMLData",
-    "DoubleMLClusterData",
-    "DoubleMLDIDData",
-    "DoubleMLPanelData",
-    "DoubleMLRDDData",
-    "DoubleMLSSMData",
-]
+__all__ = ["DoubleMLData", "DoubleMLClusterData", "DoubleMLDIDData", "DoubleMLPanelData", "DoubleMLRDDData", "DoubleMLSSMData"]
diff --git a/doubleml/data/base_data_content.txt b/doubleml/data/base_data_content.txt
deleted file mode 100644
index 1ccdf7ca9a7ea2f43ee36364c7123001ff431e8c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 60862
zcmeI5|8EsXlECNZlkR_bXMVtF7boofvdW1Q93*>4$tJoW6p4!*1BS2xlf#ezyZP7e
zu0B&Ps=BAUr{DNJFvPN8?DwXt>w8sK*Yy0~|2`g`4nGgihUdeJVKqFK-~M^{r{SUe
z{Y<`D4F|)$;r{qdyzlz(AH$8|k0;+B%h=~~=Y6?89S-IHA7%7|5faeSIgW;x<D9R@
z(Z}-tkzg>r?`MH{Bv1|p?$Q5$K7s1^9S=VZPh`#m!Qg1v6WA|gz9-|DbY37F%bifV
zFZVx@pM9b9v5b2xpB-kD*CFo>zl`G_kJvHaGl2p3B9HD17Y>CdKaI0JA35}T<XQaY
z{y6qn#=^xzfdh6>bTmAY-+w$A@k~C0Z%6Vi(%K)c1P(ht9>}*ZMP@*FD*s20?8&IZ
zaUL)|5a?yh;SRidDf1l4m_Nx(d}cHtEBkj}=Fq!W!_6@FSHqX0BYblxpEK9Nusf*U
zTpB(b$FGEE{OpWBcK1p&s|<5BVh#6@*`bVd$@9r0`2>AouFJv$H1a384=p-AnZ@w7
zxgW^)4`tlG{Fo;2`|gPO6ZsA;H>f%02IcVx<4EXND?*+}0s%~p1dBs~XE1<=9`C*7
z%NL_=U^zATHV;;Yra=4c;kCdy96lfAf==uT7CUd}fm<&}99F^$Mm!s_tj)a=Y!%|3
z-23-&Brpx{OT%w(Vc4DVKKm?=&@sOX*4Qu>tu_etJ3=keFxu>z^3rDC6E3<99Lsq4
zurCy1z5gTE!%V9To?RpF?g%E>Q_8)a;g&#i+pf#$Gr{_X;Dfay5oC>)yc8Or$lv$n
zQ>=NFV2!6c5UF5;b^4JmdWW?d^{e6P2w!!(rNiorQw^?Zl|kPZeDE#!gy*6II$H^k
z{7(MH_wQC@c~9VCU0{!|*&Dtc?}uZLgxg<NNBZoBB@#apXwPKKFT$heqSeo2{zHNO
zg<y`)KOC)9XT2)lGkaOf9>_Op@jz4z_5MA<j2NbRfR5Ct!#g5(sM{^-vgzKA+%x|(
z??G3>v3tYcWL{z|GtdA0w{UHziQOLr($2trmpgvIQuS~p)Ter5^sYMKTpnkf$IRrP
z>t4xs*!)W=a!-uDs<s)-9^UlvI@P@r9c7(G-(CpE$#hooKiXIK1vS38&6<rZ^AE(D
zzm7JoS_!o0qt#emaV#ELBZPT0&0g*a-oG`ZpVC>QBd7eD_+GMBVj?-->qO@*&Koq3
z?=|W*D=A6m_L$juR<M`MWcpGcpVDVii2i;VZI;vGxt@#ulDm^39gBCeS=Oiay1;lM
za-2dXE9X=4ZJSH&hFpT|*LlSJFU0?s__;o(Op19{u#9i6m8ap8X@#zD$he*h{hD(<
z6G@cpWApqt7QP>hHgI?NbohSaA1(+^w<U_)lz&(!Sd&R1OY&rw9B~oJKO24>eIeO(
zZehsx*{EUUHk$pCMUZt99mu`;pA3|hnL{T#*Kb-Y2%sLy5BWQq1%zGs`*@`AzI>uO
zPtF3qa9lMn>e&VPRI}kH^6g_8i&bzZmPjlkCnJ|DYdP5*xL%U)jZ4sqhTDvq(I_8*
z%AI5+<QCA5jlf+jlNFls8j4sC0G%;L!{P8s?qoHonC2@hd<EGwIqg#!9jWd0gZn4B
zX^(eEJ=R_E{7OC9?@mF>`f$MRTn5{+J)fZ^<<nW>m5H6TIdDz*hDQLma^1wDh{Y{<
z#R`bkYLCQhtz{mOYs=DZ3LZ#vS3HAP5K61~Jc6kAj-yVqPFFcZef})q{n_xt1V6+4
z2eBXgtYyd8B9=?8r0ZX76pK@@@pyP6+{7lWdeWLTR-;NQ_aBCfA{XqN|9=(88a4Ir
zoCdFf?TwxQOGXo33w^+ua^V~KR;`^lir-1CzLcFtdMskDMz5daGf!QFWx7f1z-t-o
zDb}w~w3hsVXrmh6f`#tA7&RSho%-YH9Mq<?HUQ^+7PJBH%zB)8TeJ4~2>jEff@EGz
z<|ivAZmDMn_Byygi|1O%+VsKrZr59)xhv6_AI3bK8Tp-AH7ZWI2Zwe~xTV!ZKG7V7
zu_j4=C))iwW(QQ<G;$#k=D~_A`gts-mZ}z|T0bYt)rha_h+$7A@w_On9yvh~kfL6s
zkTpkDU4#o7`{7xcV~l<iq?|*;uWF@D^8uHh<=?9jqx3%4THuwm6N`uAz>a+F*N~es
z?xASIOTm!rMl(I;GacpksZB*{)u!zJWjc-L?c;Z}GShRr_EKZ_*Z92&ic;89z1ajE
zC7p<$sa~)jVAdG(DXrqGmbpf1RT3{r^^6QZLNw0Z8l#feF|jFp0kOyI9+_IUJ<YB!
z%bnBg+A?V$=eYOh<E(M;Et*klY(EftCs)!O8+<omCAWnJDu8Y!#v!{#?rB9j)r?rf
zo#Xl!Leasvive#Rh_z}RnZH<l<q|*^YSHGm(+tS$9S#``_a(O0Nb$|1xzTCYx(#{_
z*0~kAqw}#B;=6Ce&bR}omT8)4QL~;k+)DUFq%3iXH5j=QyuzPqPKd8&EvC^KU4};~
zHrS2s69C!PexJziI_~KRQ+InFjk5#8wY#K)t5KU?HFWA5fzCe3W3f?P|K)gKS!ffS
zjyg(o&*SqHM@F&dO700w{Ec7ds=Gsfm7O~N(p5bin&S<3D@?spM{5L&T3~vl849%p
zZ1&RI`BN=|o+<t&hdG=qe%9u?I7(M59G6dcaq)!iU6H|&aG%OT2?LGHIb@BbMx$Mc
zo$7b7Sk_rlL-1u-I+_XBat;`^eAmL*sCB&6|5AfM&ZbMczove?XQf7d#NF)ZV+H0x
zN>VkSV$fOVV0F--6h4$s_}y#7o>}7UScw>>FJ#V@e1et!B5?5U8fnSt@$`I${=mNo
zk%|>Klx3&27q{ZJ9+l{1M3mt)V^>+Wti0|?qeV}1vyvYr*G)ZFj5X%1K3qRq$|XFf
zNwe$+qZO4c@!Yc#0r4c~p4H>|wmd5ylRA+5Oy;zjI=<BWWg1~h9>=0Uuh-Iu?LKOK
z3qnH<B}aH9|M0Y6pwxJ_`E-m5eCo9ki+Xll>gPUBYT)f^SLE2g5BzvlkBrtUn)F1$
zo7kxpYnN0!Zwjr{Lf3=3Cp>y0aQ9?B9gnTO7-MESpY4!P!+#<XY+rgwZpf@RhyRpy
z`F}~j)u+<k_2=UJ7e?&!Iwtd3FPBv_O=e(21<9!LJ?;{H>`iM&Lob%QV+^)^X;z@x
zMRZlNmFb)pCD$v>tCd?;OGwC9O~f_g|I_g^!>uMUdXKbY>!M(Bam*?&%4fA%*WspR
z%<$8ywIxb+MQ_Qbwbzf_c60pnnQIjF@X?_tW;Jf^1>XA@b{}#IBOiP>H-3sIM0(Rc
zyLgxR3@k@`?@Hey0qpaSi6+@tuP2tjk9EQF9kHT&^=+lBd~d9j&E1=>w7vc}H*RU2
z=jz+FVHx*QCYi6um%zxg(K#6Hx=GsI74;$Ci82ky_1QX0%^R_M+Sl#LY;i`L!Sr)a
ztlZ?Nw&Z`e$3-pFk@1uE*r;<lHh$6`Thl>>Q~S0JGun&7iJ!E`wq>u-+V`~KBONN1
z+K0_b@mO{US#fKpwZ-UKkDp6=sTC>>OGme<>#<;+zvs^N=gKX;K39Hcd#-({0U#lw
z4i@FvD_JxBmM)i6!u4_U63%DM_pkcrO-J_3ta`XVQu{_CY?>vR-_hEQ(_XVzGF&*t
zP8!uF+r3Hq?hUG49WFhB<gV~G?rrU#>`amalrg*@v)vOa$hza+6B!Y;K3$jOyYQ@2
zsP$Ogh*}0a+*&EqI##Por>#v!3ts7gVD5BApR4^+N>zjxGZ(F(oOf406dU~TyQ?i9
zIPa?V-v6li<`?Yx`{}ASis44&i2DLtgXCT5o9eD^KTkmK_198KZ*|I00p=+lT}j1E
zwbgrVlrsZ$=(+w#-RI8dihi|swi^-wyr!#tFFD+pSN6iaB~eiQw)c9}_UrTcVqV>v
zZyGg>kK2vdH7#5Xe;BhQq<ABV;JuXRai5(`s(g_P(K1qc+2vx_=Y{<6bdYHwS-{0H
zH<@M@Mhle<ug%&>p8nx^uN*HM)y{0}uH0H9pKb|!x&~st<GbOxX0)-1Tr$qswFejS
zHL9*(OO%BJ@AtV1c!E5Sr3&YJfl>PA7rj=O){El3#s{DtS86!amr7%(sg*u|Q2G3g
zSb^of<&mkpJ-?kBw{$%>R}1M`r!9ezufOJCwDo)anv7QkP5ySpKTp>Kd53CV)44f%
zmxFO9%e0Q}5!Y3Eh1c)1HLT|PZSB6fzFWKRl~^-AWE~7knr`r}i%-ywVRm<^td@FB
zrn|>Bo)y}5@v*gBx)z?spE{20@NXN(`3#4>6XTj)-<)7(zQuOfw<leyA$b<VUgDIW
zwQga<{igWvB|V`wzUDDiR_~4@DIV{h4@)C}^2vJKS~iYn^e(`1cN==)brYWWkIy)L
zNL`qwUv>RtJ(6}~mbljwK~ou+^{Rc9U>a^c646T5htzLq)cqd1V~kes!}$374~=Gg
z^Y5b%(I{45;QsMr?MF0v(`Zo22<Q6}4c0dNaJ~xpSpBf?SJ~4=rTj!?(dt(;S)6<3
zX<zim&>3#>J=>mnlYNXv@rT>bXcU~*KeqZCZH~OUV4vG{JH3wm(6`(3nBy_+JJVi(
zO>`=Hy)y5t%dvR>9d?FMy{4wS4%G0-2w2zH6BwKk$FocA=dh^L>uRXhV`Vt#Og%7H
z!@rDs5<7y6^+4NGH#<`2X|20#wz#UgGs4NwaO;6$(5C0T_0iEjv&x{`m0i;wxa(l9
zJF(QI*2iV13mbi+6CJSA6KwU3Zum1wwstN*nZzk~J#>|ON!Ynm{W=;~F51<yT%)b#
zGN;o~%Wg|AOMc4YGQCl0vEzi)dNg8lGML++?DrIT2EPeUk;D8@J;jucKJB0L6gk`;
zi;6IJ#tQcOxRkXt$7XX{+C$goJw;9r8vSlPMUKI|r^rWaP6l;P!44K3kg=cEvnI>U
z^u(`ta&Wp=7@vUJAM4xs_a)C5pKZL_<vYuI!j4jl2KJFxvOhx3bXI9#=T2$x^Kj#t
z(7O_`pn_9(IVsY2dgo<3ck(k*ZgX}X0#C$X=_PCT?s=tgM`%&MRk~Yq1NP#GU%g$f
zQGcnBA#27{8J(WKZF=ob^OP_#ZyWoClmgoiD~;0U=H|{VhQ5~0zS)4zmSyTxDF*Yh
zeN)j6gLFFHqt$aWm`?18MExYhD{@91G{DCzookJ$EIIe?jr`M76^*9++9y>TRhosa
ztvAxU_gJtcTf8D&S37c5&xp2b7d8FeoZ^6;v%cfp;$0azrTtRjj6TCaIhO9GdLPWX
zwiUal=+@G%u}#@R9m|%5)JC1&*SGDaHgB&r)yB`w>d(8Z@896bvU%HjG}Xy<?J>tH
z)h%o@zeBr8H*2r?961))ZhnvAvDBW+(CG5WJ>0yNbMAgVs#gx=oH@0;ZJ@^AK0jE3
z3J)JF_J1WBTbzkK#cG*%#nbK#-z~=ARe{T3U>?`57vvK=2bW3*&)2h0q^CRD^W1)x
z$wT>uo?6XJ>7Atdl6HYxU46O(l|DglI9NBTQeJyE1g8oSX;l7ta-ep)>pM1fCGO<U
z1nZeTam2C2-;~=#x4V+#nP$NwK8spGH*oq>%B3vsTfDN0k>y|Q`l6o5Zkbk{3z}=?
zG{qR+_RS@31C0}LknFB-!|p-4hS&F_tPg~HWU5ysBPh?)_Q9v&P3nc_v?aaUvk8qZ
zzHWh*{9ffQ=Sh_0Y?@$PikQrEGM%f&)zZB53ECM48X<WGu5@RPVY%<v?sDC4-xn>S
zeP^_=_MK60+jpjXGi$ANZyRk(JLKB9l<EV}9BTyYLx8{i5U@hHkY>B4ljYe}pQFZH
z6Q9c27VS3{yb?XLryy#T>-tX6b(w+H0ab%~pYXpGHTkBD;u%~{r@1e?H>=^R6G#ul
z8o*jl(!zgvkInrtK4CAcru76h@b7(=vy97a!G1NaaDYRU<NusC-B~L4LUW!CF*f3^
zI2vcHWBIfclsy+7UeAQ)ydm91^s{2_zWpTH@AndO$sW-&`ac<2KG~|S0jcLQP2voM
z?=tA<=KA{idc*peXEd5U``auwNB`@M&>x>D-h_QlGq-8)P}xSN_DYX}*(nvWvuCM2
z)PnhkVY@L(Z`XEN<wVc0%yv0xz&yexbl3J~>C?8(H@w$WgKJMI)cu&|wubGV)VVwl
zS(e$>JtH-=oV%9Q*tKtco6cdYk!8}#^9{?CPb0(I<LM}Q&1Whv=CIT6jRSVA%Pghd
zWjD>8jFu^9YE)CnAgiEmpPm*^gx4pc()`TkaDK5KaXHs404j7YUCC%P)|E%=N!Oi1
zj%YZv-+t&gEz@kZ6|z*bNbmn~*ihfs<ztfeu}b~jz`b@=Trsj$c@Jjx-iDe~3@5wJ
zb#27V#>UjHJo`=+wbw54`=VzmzlBrzPDGCHs342heiI^|erG-d>sz<qhdP$O$<w9R
z|1Qrsru{b09{!dyW0OLvBb<DFbP}JajHH;Lf7JZ#uJ~=c)?>3peYx{C$Nqd=Q^5IM
z;oU94Kq2dS!>KQ-dC9sqd?py*7_%@s^3*n|@=V_w>husPYu>_WNjay!D&?3yKhLhV
zK2Z)w?LMyh#!+3NSiQz`ZhZd|JA7As#WJpsD@^o(XVqCHlI`}`aP+bM#6ME>O4Ym!
zoQw77re4!!Te6wHyrO?L^B;L+O?9Un<Jy*#;^4bZ>z}fg;^J^qdy8I`2(s?^RyK>n
zr-n@qF_joH;sh!?=PF97uWs}4yxq)vtn)3dbC%Y%)_tQ{J<GIMtxx->Z(hWP$p)$I
z@xDN<_@F!boTk1#g#3;Zbm=4~pJa6vC2pEi$Dx+#rdp8OV813=9v4qjsd?l&mMu>C
z48C?d3#&9Z=(wo|ooZxU4VJTvKEGO;?bD(zq!LfZEt7NW>_W4>^=1C8=reDFwdcp}
z%?P~r;*or=bxB>{5eefn=loTB^9+%-r+Q<M@u!T5RMMqp_;^&rIzN?vwwHNjW7e3T
z@aXvK?r606KHH4c(1jOwV||^uzSO=<q_eJEQaq-4s%1P8`}Zx26z63Y6l>?F;lBAv
zqK9Sz<ibRT^*v(hC(Mrb#H&SYr}V~e=W8ADZl`?v^^G`hZIz;{6Sr^EKC1>;hT6_H
z6-rO#;Jl_``AWRTn#NSCSBW-G`^k0h@VsgfIjX;>YwwtuoSmx0oV?HL6195I9OvdL
z5Bd1$`<B-l^fW&5I_ez6lw8*~QSXV_P_Fm2I994~l;HGb<Gcr5YXH>r<Em5Vshw}S
z8vYUZ;<!c#<yC^wNmquwQ-!LS)gG*3K#ofN(P8hObSIDn&W*LIg3Xp+t<!rBlIDd}
zy5?qV$5xxa#qWQN^v=yxlC?pPyoz~)a`Ns;z0LdFwf<%_`e^I3N&69zi27n1XZZ!I
z7@q2>>5}eTlrUF#WN^$*oyfD8Hb)&>G-ngFu?s@O**IB!Do^7MQjPr(^tYJh97}KG
zr_u$xv|bb;d4{|G%F1$kU;Eik^>m%YJe%D0j5MHcQRl8DyqeGWBP{2~_A~xCZ#(1P
zT(0g`XTHKeDepU`Sy>Mzz59)E->CF-c$BGGN`CcRyk85+7TlJWX6XHUh-YQ*upFmT
z`ED!gj<UWZ6#(7&Z|@N<mFT8(@RUR9q2KkM;Jf5sI}`7b%IR|R81Yj}B~NsJRq_w#
zc}9sV<~5zO?;_9ej(m@*%d?EMsyWRtHnq>BZw}JT7hXnwUJFmy%;yy)`nR7|_^j=m
z!gKmf^)^KxwKSd+D=n{_lQ?gNv^YK&#jP6%-*y)Icv`X8J)G~$>&naRx87gUUQI<A
zmE$lHxFzd6#X`Gv){%sA*lGApIPN_L>sM-==4hFkz4|QCO3mAyrC1QDFyD*z`Wwn3
z1${eaai?|@9u^(by0q_Gqi0iJyXn<jq-5%I<G1r(H{R`B*}mV6^R_*&Yi+a)>W}C9
zH)gx}HTtckvBh9|gjrYPIW@x~4pp-Aj#T66>ZBUwf2Y5tYp89lGHuSNEq6Lso2S!-
zO|LnPl9X>6Io|c!({P#RYK&nvs&<`b8K;v#d)3+xJNo2Sw$ejuIm<JS#GD}YEA#8z
zzFnX83>Izjh->(L?D-R&W_e#Mv01xSjd#metmQ7s_$19Vvs%9^Mx|ufvXc{Mh|$Wk
z@cvvQ?YW75Tl?H=vz0Z!PixaTwP!F|TVHmZW3~y&UKicK3&v-_Be$pWT$U-nOPRXn
z)UDwxKEt>4OaxyOtZ&)3WQFwmd5=)N`#H7Hs6k7mlE<KH(*FrudH`OGyS~rmcdtF`
z33}??)UvIwaDIij6tZj1_E(cjacr+V*Qxn!>f~1HNlU4juCQ*4|K$V+d*08_&nEx-
z%cw2&3Sa%%qfO}Wc56}>gudT%*Fw)}HEyBX@$Dbp8_AaV@E%T2{dLc%dEeXv#ihJ`
zKiq>)+voOlO!W0xYgS0K)7|qA^>lZ7o8k#kJd3TT)kWI$bbq|MeOo?7eF`nF%2~`n
zTRdJ^1bLtDTX1^8ZBr3<T3PAQ;`hnv@@g^0<Q@vn<L@@R21^p4J0n`Md*+WX{`%UZ
z-sQ-i04IyJ-bD9%cmtj8PCt`p5<mWTf^I6q%5AAH!^%f3%dm1NZpZH@!^-hF&#<f}
z-xu3+{&^1N8Hc?;X+G{xp`B+~R05Tk+9A^u9p5LXmt|N}$<OahE#Eczcq<%sj+#C5
zq@7GXX9iA3^7J&h$nMEgKDA8CJci1{p5)OzAlvzC_lg;WwAyQ9<Bp8oY1jBv9hF*6
zYprF9esb%c-1SVTx-0J$A?_&7_MD5s&aWwUzGm=eo61o0_-b$+0#ed7h0D#k@Ar8Z
z;=Gl;>4F${gB51v_EaZMyVv#Y_)J!Q+SYV8OHa@u<B9kf=eE0RwHcjqspULVZ99JZ
zT$NFkUmZeyzf85p@2u&f-%-aV*He=|V|k7)U-Mx()3u77L{{?_^(^)askd{gzCFVU
zbk1sI4_RgMSf1BBlHD5phT1%5akE$KPax_ZP6@M;meGT86yT9>JQ*?gd4zjee#h>?
z!dGLQzXDgZ6TGCM-^d;8-!eu|yDY!UA%9yif0y7*u`oXC``ERvR~l(rb^V01ysM&*
zJCXmK`JyLb>*-6X6WqZW-|L^fq!!%XC92bm=Ji!FV<*wlZa(&v>bOjIj?!}%%KhMV
zc>U)*wqPtj_puyVR9^KN>;2<ocINTgSwNK49kCs)J;x`vN*&{MoXCRz-jH0{FOHW{
zUl1{_@8jh$Xz9*ln<p0pE431}TJwkI<yZ~ao0-9$lb`&(G@PYKT|_-5ma5i!AfFwK
z&!6+H(QRJM&guuA`L4snFRO+V4@|Ot@>ArV`fdI<9GS1>rJz@90?_-T%>P30JeEA|
z6ZzrnD=6ngOns94seF1<c)%ybfv<)y<&y{Dlc)0eWvLx<KImnE^;*6sH|1PSo)Q0K
z`1A0uGd-oR1mgb4SG-Ohb5lOBo8Zwla-&6eI>X)=bDj#$c<Wu6Gmk;${ma(c*Zq{v
z*`lh1d9MFG>(aKaZ#wUaytntpm>%*TQ_d>88f4W&0bF8bT7DyXSsFc@LWg2~`(1D<
zmH$!><(MzwklPDq5#*X!%W3OYQGUKW&hlONEIF63g=i0!C9ijuh~ZMplJSFhI$cFS
z6knBQ#`tRTRyzyxmE6ranPB4{9S_bq7Mvx>_k5%039||?dn;$Aab;+$QkRUKMXbG<
z@m|~Qyjxx0)K(yKv((1T&zP><IkhDAQhF3K49G7dP4@Zp)Df3{?};I2U-E__=iT*D
zrBx>UEpI+5d%-18{h2je#n?6XsWpkl9{c`Y<R!S)Ffe|UeZ~_08g@0`;Z-8jTeaKT
z$x9J4<8RDqQYg%xT7SH^Ec+OV=b*)V6<P^DmA|bYrxfd1TKTEgyzy{q?KM6cFZ$Lc
z{;aRbXD>W8tm`rL+J<$ylExe3$>M3G(~5=e`Z~Sr0ekLEHqKrF{+@~jIkc{6xG&Pq
z-*aml!_#Lo+10okWoP%Ep1URan{4dL%J3{mV49PZWau)995b1;=q=mV1(9=WmFO$N
zG@J2wu5mnSi0<0CE=NpFtF*fHqiCIG7O_qUM!x#=6{o!;RV`SvnD$Du#1AM=v9*(j
zQ#{qG()H(AT4APsEaGnVQ?s>5z1{n6Y;C9U-UjzRx*q@Jq(Y$YpS5VLF~q6^eKxZz
z9*ce#_uu@@n`TY&D(k5aEbk$h=dkOJWG+CAeAOAKBi#@kr}E4mEcx_G{Is5CLB-`~
z!6B{ou0~i(YeLM6cP?qZ;X93u>*k_*CV*LZO?Rf=|KrOhO1ZxK#x&cB_fA(HC5hqP
zv|8<_9mLF<_mZMNkH!pu>cZWo<aa|}VV2K|WoI^f*co>_cShZGxuR|Ky_Da9YWlXD
z*j_qsj|IoMb1UwdJ=$eDd&%OJA5a#@wJ9xE4YVqxHN6+|7H1&q8&(g;-&JxhS&NP9
zNWER{F$<krkG0=zY;siJd*!6^l~cs$Q|kCMrxQLXtUt-O(JG>J3>Nq7jJarT3B=}U
zj-=Y6bLv=R7iUpfmh8<a-Gqm%uj))HHNUC*oHxe$%l+Xep^<uy?q;S?t^Yt{B&z`E
zCU?)*FYI!%<M~v+^QYCO+%w6vchOTUJg%DjY96pQuAG~?pGo0<sH;)aF1tpZOes|B
z@D9iP{g?W#xK^Fa&Y(pn&f|h{JYLHydiDOBiVHU1ZsLDT);d?o+qXQM#^9(s+lx&W
zk-XN#ep<_Ons3`)#A(fL{I=MU#-yH+8aF&@wKE%=&!bt*!@10RurT~Jex)#{J{v!A
zNn&Ilu6blkp}Q|vt!l%Z#>u{O57?VvrnHuByEX3j+7YtE$uGv;3079vl|Kj1U$u7|
z=}w8wPJFyK{7s@L{Y19zq~c~UbZti6!ZN&+5~45B`rxF?$F#(*^**L_w;|e6n`3Yy
za^srd+T!qh2Y!3JO9&sPJ;v;b*W{JfDu8WXj}`cR85z$D0_LWkGo!lKre)fNb6YZ{
QZ9l&~hZ8Z|`HF1+ANk8VaR2}S

diff --git a/doubleml/data/cluster_data.py b/doubleml/data/cluster_data.py
deleted file mode 100644
index 290c61f5..00000000
--- a/doubleml/data/cluster_data.py
+++ /dev/null
@@ -1,285 +0,0 @@
-import io
-
-import numpy as np
-import pandas as pd
-from sklearn.utils import assert_all_finite
-from sklearn.utils.validation import check_array
-
-from doubleml.data.base_data import DoubleMLBaseData, DoubleMLData
-from doubleml.utils._estimation import _assure_2d_array
-
-
-class DoubleMLClusterData(DoubleMLData):
-    """Double machine learning data-backend for data with cluster variables.
-
-    :class:`DoubleMLClusterData` objects can be initialized from
-    :class:`pandas.DataFrame`'s as well as :class:`numpy.ndarray`'s.
-
-    Parameters
-    ----------
-    data : :class:`pandas.DataFrame`
-        The data.
-
-    y_col : str
-        The outcome variable.
-
-    d_cols : str or list
-        The treatment variable(s).
-
-    cluster_cols : str or list
-        The cluster variable(s).
-
-    x_cols : None, str or list
-        The covariates.
-        If ``None``, all variables (columns of ``data``) which are neither specified as outcome variable ``y_col``, nor
-        treatment variables ``d_cols``, nor instrumental variables ``z_cols`` are used as covariates.
-        Default is ``None``.
-
-    z_cols : None, str or list
-        The instrumental variable(s).
-        Default is ``None``.
-
-    t_col : None or str
-        The time variable (only relevant/used for DiD Estimators).
-        Default is ``None``.
-
-    s_col : None or str
-        The score or selection variable (only relevant/used for RDD and SSM Estimatiors).
-        Default is ``None``.
-
-    use_other_treat_as_covariate : bool
-        Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
-        Default is ``True``.
-
-    force_all_x_finite : bool or str
-        Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
-        Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
-        allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
-        Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
-        for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
-        in the covariates ``x``.
-        Default is ``True``.
-
-    Examples
-    --------    >>> from doubleml import DoubleMLClusterData
-    >>> from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
-    >>> # initialization from pandas.DataFrame
-    >>> df = make_pliv_multiway_cluster_CKMS2021(return_type='DataFrame')
-    >>> obj_dml_data_from_df = DoubleMLClusterData(df, 'Y', 'D', ['cluster_var_i', 'cluster_var_j'], z_cols='Z')
-    >>> # initialization from np.ndarray
-    >>> (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(return_type='array')
-    >>> obj_dml_data_from_array = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars, z)
-    """
-
-    def __init__(
-        self,
-        data,
-        y_col,
-        d_cols,
-        cluster_cols,
-        x_cols=None,
-        z_cols=None,
-        t_col=None,
-        s_col=None,
-        use_other_treat_as_covariate=True,
-        force_all_x_finite=True,
-    ):
-        DoubleMLBaseData.__init__(self, data)        # we need to set cluster_cols (needs _data) before call to the super __init__ because of the x_cols setter
-        self.cluster_cols = cluster_cols
-        self._set_cluster_vars()
-        DoubleMLData.__init__(
-            self, data, y_col, d_cols, x_cols, z_cols, t_col, s_col, use_other_treat_as_covariate, force_all_x_finite, is_cluster_data=True
-        )
-        self._check_disjoint_sets_cluster_cols()
-
-    def __str__(self):
-        data_summary = self._data_summary_str()
-        buf = io.StringIO()
-        self.data.info(verbose=False, buf=buf)
-        df_info = buf.getvalue()
-        res = (
-            "================== DoubleMLClusterData Object ==================\n"
-            + "\n------------------ Data summary      ------------------\n"
-            + data_summary
-            + "\n------------------ DataFrame info    ------------------\n"
-            + df_info
-        )
-        return res
-
-    def _data_summary_str(self):
-        data_summary = (
-            f"Outcome variable: {self.y_col}\n"
-            f"Treatment variable(s): {self.d_cols}\n"
-            f"Cluster variable(s): {self.cluster_cols}\n"
-            f"Covariates: {self.x_cols}\n"
-            f"Instrument variable(s): {self.z_cols}\n"
-        )
-        if self.t_col is not None:
-            data_summary += f"Time variable: {self.t_col}\n"
-        if self.s_col is not None:
-            data_summary += f"Score/Selection variable: {self.s_col}\n"
-
-        data_summary += f"No. Observations: {self.n_obs}\n"
-        return data_summary
-
-    @classmethod
-    def from_arrays(
-        cls, x, y, d, cluster_vars, z=None, t=None, s=None, use_other_treat_as_covariate=True, force_all_x_finite=True
-    ):
-        """
-        Initialize :class:`DoubleMLClusterData` from :class:`numpy.ndarray`'s.
-
-        Parameters
-        ----------
-        x : :class:`numpy.ndarray`
-            Array of covariates.
-
-        y : :class:`numpy.ndarray`
-            Array of the outcome variable.
-
-        d : :class:`numpy.ndarray`
-            Array of treatment variables.
-
-        cluster_vars : :class:`numpy.ndarray`
-            Array of cluster variables.
-
-        z : None or :class:`numpy.ndarray`
-            Array of instrumental variables.
-            Default is ``None``.
-
-        t : :class:`numpy.ndarray`
-            Array of the time variable (only relevant/used for DiD models).
-            Default is ``None``.
-
-        s : :class:`numpy.ndarray`
-            Array of the score or selection variable (only relevant/used for RDD or SSM models).
-            Default is ``None``.
-
-        use_other_treat_as_covariate : bool
-            Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
-            Default is ``True``.
-
-        force_all_x_finite : bool or str
-            Indicates whether to raise an error on infinite values and / or missings in the covariates ``x``.
-            Possible values are: ``True`` (neither missings ``np.nan``, ``pd.NA`` nor infinite values ``np.inf`` are
-            allowed), ``False`` (missings and infinite values are allowed), ``'allow-nan'`` (only missings are allowed).
-            Note that the choice ``False`` and ``'allow-nan'`` are only reasonable if the machine learning methods used
-            for the nuisance functions are capable to provide valid predictions with missings and / or infinite values
-            in the covariates ``x``.
-            Default is ``True``.
-
-        Examples
-        --------        >>> from doubleml import DoubleMLClusterData
-        >>> from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
-        >>> (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(return_type='array')
-        >>> obj_dml_data_from_array = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars, z)
-        """
-        dml_data = DoubleMLData.from_arrays(x, y, d, z, t, s, use_other_treat_as_covariate, force_all_x_finite, is_cluster_data=True)
-        cluster_vars = check_array(cluster_vars, ensure_2d=False, allow_nd=False)
-        cluster_vars = _assure_2d_array(cluster_vars)
-        if cluster_vars.shape[1] == 1:
-            cluster_cols = ["cluster_var"]
-        else:
-            cluster_cols = [f"cluster_var{i + 1}" for i in np.arange(cluster_vars.shape[1])]
-
-        data = pd.concat((pd.DataFrame(cluster_vars, columns=cluster_cols), dml_data.data), axis=1)
-
-        return cls(
-            data,
-            dml_data.y_col,
-            dml_data.d_cols,
-            cluster_cols,
-            dml_data.x_cols,
-            dml_data.z_cols,
-            dml_data.t_col,
-            dml_data.s_col,
-            dml_data.use_other_treat_as_covariate,
-            dml_data.force_all_x_finite,
-        )
-
-    @property
-    def cluster_cols(self):
-        """
-        The cluster variable(s).
-        """
-        return self._cluster_cols
-
-    @cluster_cols.setter
-    def cluster_cols(self, value):
-        reset_value = hasattr(self, "_cluster_cols")
-        if isinstance(value, str):
-            value = [value]
-        if not isinstance(value, list):
-            raise TypeError(
-                "The cluster variable(s) cluster_cols must be of str or list type. "
-                f"{str(value)} of type {str(type(value))} was passed."
-            )
-        if not len(set(value)) == len(value):
-            raise ValueError("Invalid cluster variable(s) cluster_cols: Contains duplicate values.")
-        if not set(value).issubset(set(self.all_variables)):
-            raise ValueError("Invalid cluster variable(s) cluster_cols. At least one cluster variable is no data column.")
-        self._cluster_cols = value
-        if reset_value:
-            self._check_disjoint_sets()
-            self._set_cluster_vars()
-
-    @property
-    def n_cluster_vars(self):
-        """
-        The number of cluster variables.
-        """
-        return len(self.cluster_cols)
-
-    @property
-    def cluster_vars(self):
-        """
-        Array of cluster variable(s).
-        """
-        return self._cluster_vars.values
-
-    def _get_optional_col_sets(self):
-        base_optional_col_sets = super()._get_optional_col_sets()
-        cluster_cols_set = set(self.cluster_cols)
-        return [cluster_cols_set] + base_optional_col_sets
-
-    def _check_disjoint_sets(self):
-        # apply the standard checks from the DoubleMLData class
-        super(DoubleMLClusterData, self)._check_disjoint_sets()
-        self._check_disjoint_sets_cluster_cols()
-
-    def _check_disjoint_sets_cluster_cols(self):
-        # apply the standard checks from the DoubleMLData class
-        super(DoubleMLClusterData, self)._check_disjoint_sets()
-
-        # special checks for the additional cluster variables
-        cluster_cols_set = set(self.cluster_cols)
-        y_col_set = {self.y_col}
-        x_cols_set = set(self.x_cols)
-        d_cols_set = set(self.d_cols)
-
-        z_cols_set = set(self.z_cols or [])
-        t_col_set = {self.t_col} if self.t_col else set()
-        s_col_set = {self.s_col} if self.s_col else set()
-
-        # TODO: X can not be used as cluster variable
-        cluster_checks_args = [
-            (y_col_set, "outcome variable", "``y_col``"),
-            (d_cols_set, "treatment variable", "``d_cols``"),
-            (x_cols_set, "covariate", "``x_cols``"),
-            (z_cols_set, "instrumental variable", "``z_cols``"),
-            (t_col_set, "time variable", "``t_col``"),
-            (s_col_set, "score or selection variable", "``s_col``"),
-        ]
-        for set1, name, argument in cluster_checks_args:
-            self._check_disjoint(
-                set1=set1,
-                name1=name,
-                arg1=argument,
-                set2=cluster_cols_set,
-                name2="cluster variable(s)",
-                arg2="``cluster_cols``",
-            )
-
-    def _set_cluster_vars(self):
-        assert_all_finite(self.data.loc[:, self.cluster_cols])
-        self._cluster_vars = self.data.loc[:, self.cluster_cols]
diff --git a/doubleml/data/did_data.py b/doubleml/data/did_data.py
index 150aeb7d..b528ead8 100644
--- a/doubleml/data/did_data.py
+++ b/doubleml/data/did_data.py
@@ -1,5 +1,4 @@
 import io
-import numpy as np
 import pandas as pd
 from sklearn.utils.validation import check_array
 
@@ -30,7 +29,8 @@ class DoubleMLDIDData(DoubleMLData):
     x_cols : None, str or list
         The covariates.
         If ``None``, all variables (columns of ``data``) which are neither specified as outcome variable ``y_col``, nor
-        treatment variables ``d_cols``, nor instrumental variables ``z_cols``, nor time variable ``t_col`` are used as covariates.
+        treatment variables ``d_cols``, nor instrumental variables ``z_cols``, nor time variable ``t_col``
+        are used as covariates.
         Default is ``None``.
 
     z_cols : None, str or list
@@ -56,10 +56,9 @@ class DoubleMLDIDData(DoubleMLData):
 
     force_all_d_finite : bool
         Indicates whether to raise an error on infinite values and / or missings in the treatment variables ``d``.
-        Default is ``True``.
-
-    Examples
-    --------    >>> from doubleml import DoubleMLDIDData
+        Default is ``True``.    Examples
+    --------
+    >>> from doubleml import DoubleMLDIDData
     >>> from doubleml.did.datasets import make_did_SZ2020
     >>> # initialization from pandas.DataFrame
     >>> df = make_did_SZ2020(return_type='DataFrame')
@@ -74,18 +73,18 @@ def __init__(
         data,
         y_col,
         d_cols,
-        t_col,
         x_cols=None,
         z_cols=None,
+        t_col=None,
         cluster_cols=None,
         use_other_treat_as_covariate=True,
         force_all_x_finite=True,
         force_all_d_finite=True,
     ):
-        # Set time column before calling parent constructor
-        self.t_col = t_col
+        # Initialize _t_col to None first to avoid AttributeError during parent init
+        self._t_col = None
         
-        # Call parent constructor
+        # Call parent constructor first to set _data
         super().__init__(
             data=data,
             y_col=y_col,
@@ -97,7 +96,10 @@ def __init__(
             force_all_x_finite=force_all_x_finite,
             force_all_d_finite=force_all_d_finite,
         )
-        
+
+        # Set time column after parent constructor (which sets _data)
+        self.t_col = t_col
+
         # Set time variable array after data is loaded
         self._set_time_var()
 
@@ -168,15 +170,15 @@ def from_arrays(
         if t.shape[1] != 1:
             raise ValueError("t must be a single column.")
         t_col = "t"
-        
+
         # Create base data using parent class method
         base_data = DoubleMLData.from_arrays(
             x, y, d, z, cluster_vars, use_other_treat_as_covariate, force_all_x_finite, force_all_d_finite
         )
-        
+
         # Add time variable to the DataFrame
         data = pd.concat((base_data.data, pd.DataFrame(t, columns=[t_col])), axis=1)
-        
+
         return cls(
             data,
             base_data.y_col,
@@ -201,15 +203,14 @@ def t_col(self):
     def t_col(self, value):
         if not isinstance(value, str):
             raise TypeError(
-                "The time variable t_col must be of str type. "
-                f"{str(value)} of type {str(type(value))} was passed."
+                "The time variable t_col must be of str type. " f"{str(value)} of type {str(type(value))} was passed."
             )
         # Check if data exists (during initialization it might not)
-        if hasattr(self, '_data') and value not in self.all_variables:
+        if hasattr(self, "_data") and value not in self.all_variables:
             raise ValueError("Invalid time variable t_col. The time variable is no data column.")
         self._t_col = value
         # Update time variable array if data is already loaded
-        if hasattr(self, '_data'):
+        if hasattr(self, "_data"):
             self._set_time_var()
 
     @property
@@ -217,13 +218,37 @@ def t(self):
         """
         Array of time variable.
         """
-        return self._t.values
+        if self.t_col is not None:
+            return self._t.values
+        else:
+            return None
+
+    @t_col.setter
+    def t_col(self, value):
+        reset_value = hasattr(self, "_t_col")
+        if value is not None:
+            if not isinstance(value, str):
+                raise TypeError(
+                    "The time variable t_col must be of str type (or None). "
+                    f"{str(value)} of type {str(type(value))} was passed."
+                )
+            if value not in self.all_variables:
+                raise ValueError(f"Invalid time variable t_col. {value} is no data column.")
+            self._t_col = value
+        else:
+            self._t_col = None
+        if reset_value:
+            self._check_disjoint_sets()
+            self._set_y_z_t_s()
+
 
     def _get_optional_col_sets(self):
         """Get optional column sets including time column."""
         base_optional_col_sets = super()._get_optional_col_sets()
-        t_col_set = {self.t_col}
-        return [t_col_set] + base_optional_col_sets
+        if self.t_col is not None:
+            t_col_set = {self.t_col}
+            return [t_col_set] + base_optional_col_sets
+        return base_optional_col_sets
 
     def _check_disjoint_sets(self):
         """Check that time column doesn't overlap with other variables."""
@@ -259,7 +284,7 @@ def _check_disjoint_sets_t_col(self):
 
     def _set_time_var(self):
         """Set the time variable array."""
-        if hasattr(self, '_data') and self.t_col in self.data.columns:
+        if hasattr(self, "_data") and self.t_col in self.data.columns:
             self._t = self.data.loc[:, [self.t_col]]
 
     def __str__(self):
diff --git a/doubleml/data/panel_data.py b/doubleml/data/panel_data.py
index f34b2ee1..c1ec3bb5 100644
--- a/doubleml/data/panel_data.py
+++ b/doubleml/data/panel_data.py
@@ -67,8 +67,7 @@ class DoubleMLPanelData(DoubleMLData):
     ...     y_col="y",
     ...     d_cols="d",
     ...     id_col="id",
-    ...     t_col="t",
-    ...     x_cols=["Z1", "Z2", "Z3", "Z4"],
+    ...     t_col="t",    ...     x_cols=["Z1", "Z2", "Z3", "Z4"],
     ...     datetime_unit="M"
     ... )
     """
@@ -83,16 +82,20 @@ def __init__(
         x_cols=None,
         z_cols=None,
         use_other_treat_as_covariate=True,
-        force_all_x_finite=True,        datetime_unit="M",
+        force_all_x_finite=True,
+        datetime_unit="M",
     ):
         DoubleMLBaseData.__init__(self, data)
 
         # we need to set id_col (needs _data) before call to the super __init__ because of the x_cols setter
         self.id_col = id_col
         self._datetime_unit = _is_valid_datetime_unit(datetime_unit)
-        self._set_id_var()        # Set t_col first before calling parent constructor
+        self._set_id_var()
+
+        # Set time column before calling parent constructor
         self.t_col = t_col
 
+        # Call parent constructor
         DoubleMLData.__init__(
             self,
             data=data,
@@ -104,11 +107,14 @@ def __init__(
             force_all_x_finite=force_all_x_finite,
             force_all_d_finite=False,
         )
+
+        # Set time variable array after data is loaded
+        self._set_time_var()
+
         if self.n_treat != 1:
             raise ValueError("Only one treatment column is allowed for panel data.")
 
         self._check_disjoint_sets_id_col()
-        self._set_t()
 
         # intialize the unique values of g and t
         self._g_values = np.sort(np.unique(self.d))  # unique values of g
@@ -151,9 +157,8 @@ def datetime_unit(self):
         """
         The unit of the time variable.
         """
-        return self._datetime_unit
+        return self._datetime_unit @ property
 
-    @property
     def d(self):
         """
         Array of treatment variable;
@@ -171,7 +176,7 @@ def t(self):
         """
         Array of time variable.
         """
-        if pd.api.types.is_datetime64_any_dtype(self._d):
+        if pd.api.types.is_datetime64_any_dtype(self._t):
             return self._t.values.astype(f"datetime64[{self.datetime_unit}]")
         else:
             return self._t.values
@@ -216,14 +221,15 @@ def n_obs(self):
         """
         The number of observations. For panel data, the number of unique values for id_col.
         """
-        return len(self._id_var_unique)    @property
+        return len(self._id_var_unique)
+
+    @property
     def g_col(self):
         """
         The treatment variable indicating the time of treatment exposure.
         """
-        return self._d_cols[0]
+        return self._d_cols[0] @ DoubleMLData.d_cols.setter
 
-    @DoubleMLData.d_cols.setter
     def d_cols(self, value):
         super(self.__class__, self.__class__).d_cols.__set__(self, value)
         if hasattr(self, "_g_values"):
@@ -232,7 +238,8 @@ def d_cols(self, value):
     @property
     def g_values(self):
         """
-        The unique values of the treatment variable (groups) ``d``.        """
+        The unique values of the treatment variable (groups) ``d``.
+        """
         return self._g_values
 
     @property
@@ -253,25 +260,19 @@ def t_col(self):
     def t_col(self, value):
         if value is None:
             raise TypeError("Invalid time variable t_col. Time variable required for panel data.")
-        reset_value = hasattr(self, "_t_col")
         if not isinstance(value, str):
             raise TypeError(
-                f"The time variable t_col must be of str type. {str(value)} of type {str(type(value))} was passed."
+                "The time variable t_col must be of str type. " f"{str(value)} of type {str(type(value))} was passed."
             )
-        if value not in self.all_variables:
-            raise ValueError(f"Invalid time variable t_col. {value} is no data column.")
+        # Check if data exists (during initialization it might not)
+        if hasattr(self, "_data") and value not in self.all_variables:
+            raise ValueError("Invalid time variable t_col. The time variable is no data column.")
         self._t_col = value
-        if reset_value:
-            self._check_disjoint_sets()
-            self._set_t()
-            if hasattr(self, "_t_values"):
-                self._t_values = np.sort(np.unique(self.t))  # update unique values of t
-
-    def _set_t(self):
-        """Set time variable."""
-        if self.t_col is not None:
-            assert_all_finite(self.data.loc[:, self.t_col])
-            self._t = self.data.loc[:, self.t_col]
+        # Update time variable array if data is already loaded
+        if hasattr(self, "_data"):
+            self._set_time_var()
+        if hasattr(self, "_t_values"):
+            self._t_values = np.sort(np.unique(self.t))  # update unique values of t
 
     @property
     def t_values(self):
@@ -290,13 +291,14 @@ def n_t_periods(self):
     def _get_optional_col_sets(self):
         base_optional_col_sets = super()._get_optional_col_sets()
         id_col_set = {self.id_col}
-        t_col_set = {self.t_col}  # t_col is not None for panel data
+        t_col_set = {self.t_col}
         return [id_col_set, t_col_set] + base_optional_col_sets
 
     def _check_disjoint_sets(self):
         # apply the standard checks from the DoubleMLData class
         super(DoubleMLPanelData, self)._check_disjoint_sets()
         self._check_disjoint_sets_id_col()
+        self._check_disjoint_sets_t_col()
 
     def _check_disjoint_sets_id_col(self):
         # apply the standard checks from the DoubleMLData class
@@ -329,7 +331,38 @@ def _check_disjoint_sets_id_col(self):
                 arg2="``id_col``",
             )
 
+    def _check_disjoint_sets_t_col(self):
+        """Check that time column is disjoint from other variable sets."""
+        t_col_set = {self.t_col}
+        y_col_set = {self.y_col}
+        x_cols_set = set(self.x_cols)
+        d_cols_set = set(self.d_cols)
+        z_cols_set = set(self.z_cols or [])
+        id_col_set = {self.id_col}
+
+        t_checks_args = [
+            (y_col_set, "outcome variable", "``y_col``"),
+            (d_cols_set, "treatment variable", "``d_cols``"),
+            (x_cols_set, "covariate", "``x_cols``"),
+            (z_cols_set, "instrumental variable", "``z_cols``"),
+            (id_col_set, "identifier variable", "``id_col``"),
+        ]
+        for set1, name, argument in t_checks_args:
+            self._check_disjoint(
+                set1=set1,
+                name1=name,
+                arg1=argument,
+                set2=t_col_set,
+                name2="time variable",
+                arg2="``t_col``",
+            )
+
     def _set_id_var(self):
         assert_all_finite(self.data.loc[:, self.id_col])
         self._id_var = self.data.loc[:, self.id_col]
         self._id_var_unique = np.unique(self._id_var.values)
+
+    def _set_time_var(self):
+        """Set the time variable array."""
+        if hasattr(self, "_data") and self.t_col in self.data.columns:
+            self._t = self.data.loc[:, self.t_col]
diff --git a/doubleml/data/rdd_data.py b/doubleml/data/rdd_data.py
index 3798dd7e..ac0fff67 100644
--- a/doubleml/data/rdd_data.py
+++ b/doubleml/data/rdd_data.py
@@ -24,13 +24,13 @@ class DoubleMLRDDData(DoubleMLData):
     d_cols : str or list
         The treatment variable(s).
 
-    s_col : str
+    score_col : str
         The score/running variable for RDD models.
 
     x_cols : None, str or list
         The covariates.
         If ``None``, all variables (columns of ``data``) which are neither specified as outcome variable ``y_col``, nor
-        treatment variables ``d_cols``, nor instrumental variables ``z_cols``, nor score variable ``s_col`` are used as covariates.
+        treatment variables ``d_cols``, nor instrumental variables ``z_cols``, nor score variable ``score_col`` are used as covariates.
         Default is ``None``.
 
     z_cols : None, str or list
@@ -74,7 +74,7 @@ def __init__(
         data,
         y_col,
         d_cols,
-        s_col,
+        score_col,
         x_cols=None,
         z_cols=None,
         cluster_cols=None,
@@ -83,8 +83,8 @@ def __init__(
         force_all_d_finite=True,
     ):
         # Set score column before calling parent constructor
-        self.s_col = s_col
-        
+        self.score_col = score_col
+
         # Call parent constructor
         super().__init__(
             data=data,
@@ -97,7 +97,7 @@ def __init__(
             force_all_x_finite=force_all_x_finite,
             force_all_d_finite=force_all_d_finite,
         )
-        
+
         # Set score variable array after data is loaded
         self._set_score_var()
 
@@ -107,7 +107,7 @@ def from_arrays(
         x,
         y,
         d,
-        s,
+        score,
         z=None,
         cluster_vars=None,
         use_other_treat_as_covariate=True,
@@ -128,7 +128,7 @@ def from_arrays(
         d : :class:`numpy.ndarray`
             Array of treatment variables.
 
-        s : :class:`numpy.ndarray`
+        score : :class:`numpy.ndarray`
             Array of the score/running variable for RDD models.
 
         z : None or :class:`numpy.ndarray`
@@ -157,31 +157,32 @@ def from_arrays(
             Default is ``True``.
 
         Examples
-        --------        >>> from doubleml import DoubleMLRDDData
+        --------
+        >>> from doubleml import DoubleMLRDDData
         >>> from doubleml.rdd.datasets import make_rdd_data
         >>> (x, y, d, s) = make_rdd_data(return_type='array')
         >>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(x, y, d, s=s)
         """
         # Prepare score variable
-        s = check_array(s, ensure_2d=False, allow_nd=False)
-        s = _assure_2d_array(s)
-        if s.shape[1] != 1:
-            raise ValueError("s must be a single column.")
-        s_col = "s"
-        
+        score = check_array(score, ensure_2d=False, allow_nd=False)
+        score = _assure_2d_array(score)
+        if score.shape[1] != 1:
+            raise ValueError("score must be a single column.")
+        score_col = "score"
+
         # Create base data using parent class method
         base_data = DoubleMLData.from_arrays(
             x, y, d, z, cluster_vars, use_other_treat_as_covariate, force_all_x_finite, force_all_d_finite
         )
-        
+
         # Add score variable to the DataFrame
-        data = pd.concat((base_data.data, pd.DataFrame(s, columns=[s_col])), axis=1)
-        
+        data = pd.concat((base_data.data, pd.DataFrame(score, columns=[score_col])), axis=1)
+
         return cls(
             data,
             base_data.y_col,
             base_data.d_cols,
-            s_col,
+            score_col,
             base_data.x_cols,
             base_data.z_cols,
             base_data.cluster_cols,
@@ -191,49 +192,48 @@ def from_arrays(
         )
 
     @property
-    def s_col(self):
+    def score_col(self):
         """
         The score/running variable.
         """
-        return self._s_col
+        return self._score_col
 
-    @s_col.setter
-    def s_col(self, value):
+    @score_col.setter
+    def score_col(self, value):
         if not isinstance(value, str):
             raise TypeError(
-                "The score variable s_col must be of str type. "
-                f"{str(value)} of type {str(type(value))} was passed."
+                "The score variable score_col must be of str type. " f"{str(value)} of type {str(type(value))} was passed."
             )
         # Check if data exists (during initialization it might not)
-        if hasattr(self, '_data') and value not in self.all_variables:
-            raise ValueError("Invalid score variable s_col. The score variable is no data column.")
-        self._s_col = value
+        if hasattr(self, "_data") and value not in self.all_variables:
+            raise ValueError("Invalid score variable score_col. The score variable is no data column.")
+        self._score_col = value
         # Update score variable array if data is already loaded
-        if hasattr(self, '_data'):
+        if hasattr(self, "_data"):
             self._set_score_var()
 
     @property
-    def s(self):
+    def score(self):
         """
         Array of score/running variable.
         """
-        return self._s.values
+        return self._score.values
 
     def _get_optional_col_sets(self):
         """Get optional column sets including score column."""
         base_optional_col_sets = super()._get_optional_col_sets()
-        s_col_set = {self.s_col}
-        return [s_col_set] + base_optional_col_sets
+        score_col_set = {self.score_col}
+        return [score_col_set] + base_optional_col_sets
 
     def _check_disjoint_sets(self):
         """Check that score column doesn't overlap with other variables."""
         # Apply standard checks from parent class
         super()._check_disjoint_sets()
-        self._check_disjoint_sets_s_col()
+        self._check_disjoint_sets_score_col()
 
-    def _check_disjoint_sets_s_col(self):
+    def _check_disjoint_sets_score_col(self):
         """Check that score column is disjoint from other variable sets."""
-        s_col_set = {self.s_col}
+        score_col_set = {self.score_col}
         y_col_set = {self.y_col}
         x_cols_set = set(self.x_cols)
         d_cols_set = set(self.d_cols)
@@ -252,21 +252,21 @@ def _check_disjoint_sets_s_col(self):
                 set1=set1,
                 name1=name,
                 arg1=argument,
-                set2=s_col_set,
+                set2=score_col_set,
                 name2="score variable",
-                arg2="``s_col``",
+                arg2="``score_col``",
             )
 
     def _set_score_var(self):
         """Set the score variable array."""
-        if hasattr(self, '_data') and self.s_col in self.data.columns:
-            self._s = self.data.loc[:, [self.s_col]]
+        if hasattr(self, "_data") and self.score_col in self.data.columns:
+            self._score = self.data.loc[:, [self.score_col]]
 
     def __str__(self):
         """String representation."""
         data_summary = self._data_summary_str()
         buf = io.StringIO()
         print("================== DoubleMLRDDData Object ==================", file=buf)
-        print(f"Score variable: {self.s_col}", file=buf)
+        print(f"Score variable: {self.score_col}", file=buf)
         print(data_summary, file=buf)
         return buf.getvalue()
diff --git a/doubleml/data/ssm_data.py b/doubleml/data/ssm_data.py
index d8f3988e..301a4234 100644
--- a/doubleml/data/ssm_data.py
+++ b/doubleml/data/ssm_data.py
@@ -1,5 +1,4 @@
 import io
-import numpy as np
 import pandas as pd
 from sklearn.utils.validation import check_array
 
@@ -30,7 +29,8 @@ class DoubleMLSSMData(DoubleMLData):
     x_cols : None, str or list
         The covariates.
         If ``None``, all variables (columns of ``data``) which are neither specified as outcome variable ``y_col``, nor
-        treatment variables ``d_cols``, nor instrumental variables ``z_cols``, nor selection variable ``s_col`` are used as covariates.
+        treatment variables ``d_cols``, nor instrumental variables ``z_cols``, nor selection variable ``s_col``
+        are used as covariates.
         Default is ``None``.
 
     z_cols : None, str or list
@@ -203,15 +203,14 @@ def s_col(self):
     def s_col(self, value):
         if not isinstance(value, str):
             raise TypeError(
-                "The selection variable s_col must be of str type. "
-                f"{str(value)} of type {str(type(value))} was passed."
+                "The selection variable s_col must be of str type. " f"{str(value)} of type {str(type(value))} was passed."
             )
         # Check if data exists (during initialization it might not)
-        if hasattr(self, '_data') and value not in self.all_variables:
+        if hasattr(self, "_data") and value not in self.all_variables:
             raise ValueError("Invalid selection variable s_col. The selection variable is no data column.")
         self._s_col = value
         # Update selection variable array if data is already loaded
-        if hasattr(self, '_data'):
+        if hasattr(self, "_data"):
             self._set_selection_var()
 
     @property
@@ -261,7 +260,7 @@ def _check_disjoint_sets_s_col(self):
 
     def _set_selection_var(self):
         """Set the selection variable array."""
-        if hasattr(self, '_data') and self.s_col in self.data.columns:
+        if hasattr(self, "_data") and self.s_col in self.data.columns:
             self._s = self.data.loc[:, [self.s_col]]
 
     def __str__(self):
diff --git a/doubleml/data/tests/test_cluster_data.py b/doubleml/data/tests/test_cluster_data.py
index 09a45ccd..4489e528 100644
--- a/doubleml/data/tests/test_cluster_data.py
+++ b/doubleml/data/tests/test_cluster_data.py
@@ -9,15 +9,13 @@
 @pytest.mark.ci
 def test_obj_vs_from_arrays():
     np.random.seed(3141)
-    (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="array")
-    dml_data = DoubleMLData.from_arrays(x, y, d, z=z, cluster_vars=cluster_vars, is_cluster_data=True)
+    dml_data = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
     dml_data_from_array = DoubleMLData.from_arrays(
         dml_data.data[dml_data.x_cols],
         dml_data.data[dml_data.y_col],
         dml_data.data[dml_data.d_cols],
-        z=dml_data.data[dml_data.z_cols],
-        cluster_vars=dml_data.data[dml_data.cluster_cols],
-        is_cluster_data=True
+        dml_data.data[dml_data.cluster_cols],
+        dml_data.data[dml_data.z_cols],
     )
     df = dml_data.data.copy()
     df.rename(
@@ -30,9 +28,8 @@ def test_obj_vs_from_arrays():
         dml_data.data[dml_data.x_cols],
         dml_data.data[dml_data.y_col],
         dml_data.data[dml_data.d_cols],
-        z=dml_data.data[dml_data.z_cols],
-        cluster_vars=dml_data.data[dml_data.cluster_cols[1]],
-        is_cluster_data=True
+        dml_data.data[dml_data.cluster_cols[1]],
+        dml_data.data[dml_data.z_cols],
     )
     df = dml_data.data.copy().drop(columns="cluster_var_i")
     df.rename(columns={"cluster_var_j": "cluster_var", "Y": "y", "D": "d", "Z": "z"}, inplace=True)
@@ -42,7 +39,7 @@ def test_obj_vs_from_arrays():
 @pytest.mark.ci
 def test_x_cols_setter_defaults_w_cluster():
     df = pd.DataFrame(np.tile(np.arange(6), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "xx3", "cluster1"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", is_cluster_data=True)
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1")
     assert dml_data.x_cols == ["xx1", "xx2", "xx3"]
     dml_data.x_cols = ["xx1", "xx3"]
     assert dml_data.x_cols == ["xx1", "xx3"]
@@ -51,53 +48,48 @@ def test_x_cols_setter_defaults_w_cluster():
 
     # with instrument
     df = pd.DataFrame(np.tile(np.arange(6), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "z", "cluster1"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="z", is_cluster_data=True)
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="z")
     assert dml_data.x_cols == ["xx1", "xx2"]
 
     # without instrument and with time
     df = pd.DataFrame(np.tile(np.arange(6), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "tt", "cluster1"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", is_cluster_data=True)
-    assert dml_data.x_cols == ["xx1", "xx2", "tt"]
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", t_col="tt")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with instrument and with time
     df = pd.DataFrame(np.tile(np.arange(7), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "tt", "cluster1"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", is_cluster_data=True)
-    assert dml_data.x_cols == ["xx1", "xx2", "tt"]
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", t_col="tt")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # without instrument and with selection
     df = pd.DataFrame(np.tile(np.arange(6), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "ss", "cluster1"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", is_cluster_data=True)
-    assert dml_data.x_cols == ["xx1", "xx2", "ss"]
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", s_col="ss")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with instrument and with selection
     df = pd.DataFrame(np.tile(np.arange(7), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "ss", "cluster1"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", is_cluster_data=True)
-    assert dml_data.x_cols == ["xx1", "xx2", "ss"]
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", s_col="ss")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # without instrument with time with selection
     df = pd.DataFrame(np.tile(np.arange(7), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "tt", "ss", "cluster1"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", is_cluster_data=True)
-    assert dml_data.x_cols == ["xx1", "xx2", "tt", "ss"]
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", t_col="tt", s_col="ss")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with instrument with time with selection
     df = pd.DataFrame(np.tile(np.arange(8), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "tt", "ss", "cluster1"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", is_cluster_data=True)
-    assert dml_data.x_cols == ["xx1", "xx2", "tt", "ss"]
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", t_col="tt", s_col="ss")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
 
 @pytest.mark.ci
 def test_cluster_cols_setter():
     np.random.seed(3141)
-    (x, y, d) = make_plr_CCDDHNR2018(n_obs=100, return_type="array")
-    # Create a pandas DataFrame with X, y, and d columns
-    df = pd.DataFrame(np.column_stack((x[:, :7], y, d)), 
-                     columns=[f"X{i + 1}" for i in np.arange(7)] + ["y", "d1", "d2"])
-
+    dml_data = make_plr_CCDDHNR2018(n_obs=100)
+    df = dml_data.data.copy().iloc[:, :10]
+    df.columns = [f"X{i + 1}" for i in np.arange(7)] + ["y", "d1", "d2"]
     dml_data = DoubleMLData(
-        df, "y", ["d1", "d2"], 
-        x_cols=[f"X{i + 1}" for i in np.arange(5)],
-        cluster_cols=[f"X{i + 1}" for i in [5, 6]],
-        is_cluster_data=True
+        df, "y", ["d1", "d2"], cluster_cols=[f"X{i + 1}" for i in [5, 6]], x_cols=[f"X{i + 1}" for i in np.arange(5)]
     )
 
     cluster_vars = df[["X6", "X7"]].values
@@ -137,49 +129,56 @@ def test_disjoint_sets():
         r"and cluster variable\(s\) \(``cluster_cols``\)."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="yy", is_cluster_data=True)
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="yy")
     msg = (
         r"At least one variable/column is set as treatment variable \(``d_cols``\) "
         r"and cluster variable\(s\) \(``cluster_cols``\)."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="dd1", is_cluster_data=True)
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="dd1")
     msg = (
         r"At least one variable/column is set as covariate \(``x_cols``\) " r"and cluster variable\(s\) \(``cluster_cols``\)."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="xx2", is_cluster_data=True)
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="xx2")
 
     msg = (
         r"At least one variable/column is set as instrumental variable \(``z_cols``\) "
         r"and cluster variable\(s\) \(``cluster_cols``\)."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], z_cols=["xx2"], cluster_cols="xx2", is_cluster_data=True)
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], z_cols=["xx2"], cluster_cols="xx2")
+
+    msg = (
+        r"At least one variable/column is set as time variable \(``t_col``\) "
+        r"and cluster variable\(s\) \(``cluster_cols``\)."
+    )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], t_col="xx2", cluster_cols="xx2")
+
+    msg = (
+        r"At least one variable/column is set as score or selection variable \(``s_col``\) "
+        r"and cluster variable\(s\) \(``cluster_cols``\)."
+    )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], s_col="xx2", cluster_cols="xx2")
 
 
 @pytest.mark.ci
 def test_duplicates():
     np.random.seed(3141)
-    (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="array")
-    df = pd.DataFrame(np.column_stack((x, y, d, z)), 
-                     columns=[f"X{i+1}" for i in range(x.shape[1])] + ["Y", "D", "Z"])
-    cluster_df = pd.DataFrame(cluster_vars, columns=["cluster_var_i", "cluster_var_j"])
-    data = pd.concat([df, cluster_df], axis=1)
+    dml_cluster_data = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
 
     msg = r"Invalid cluster variable\(s\) cluster_cols: Contains duplicate values."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(data, y_col="Y", d_cols=["D"], cluster_cols=["X3", "X2", "X3"], is_cluster_data=True)
-
-    dml_data = DoubleMLData(data, y_col="Y", d_cols=["D"], cluster_cols=["X3", "X2"], is_cluster_data=True)
+        _ = DoubleMLData(dml_cluster_data.data, y_col="y", d_cols=["d"], cluster_cols=["X3", "X2", "X3"])
     with pytest.raises(ValueError, match=msg):
-        dml_data.cluster_cols = ["X3", "X2", "X3"]
+        dml_cluster_data.cluster_cols = ["X3", "X2", "X3"]
 
     msg = "Invalid pd.DataFrame: Contains duplicate column names."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLData(
-            pd.DataFrame(np.zeros((100, 5)), columns=["y", "d", "X3", "X2", "y"]), 
-            y_col="y", d_cols=["d"], cluster_cols=["X2"], is_cluster_data=True
+            pd.DataFrame(np.zeros((100, 5)), columns=["y", "d", "X3", "X2", "y"]), y_col="y", d_cols=["d"], cluster_cols=["X2"]
         )
 
 
@@ -187,14 +186,13 @@ def test_duplicates():
 def test_dml_datatype():
     data_array = np.zeros((100, 10))
     with pytest.raises(TypeError):
-        _ = DoubleMLData(data_array, y_col="y", d_cols=["d"], cluster_cols=["X3", "X2"], is_cluster_data=True)
+        _ = DoubleMLData(data_array, y_col="y", d_cols=["d"], cluster_cols=["X3", "X2"])
 
 
 @pytest.mark.ci
 def test_cluster_data_str():
     np.random.seed(3141)
-    (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="array")
-    dml_data = DoubleMLData.from_arrays(x, y, d, z=z, cluster_vars=cluster_vars, is_cluster_data=True)
+    dml_data = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
 
     # Convert the object to string
     dml_str = str(dml_data)
@@ -205,11 +203,28 @@ def test_cluster_data_str():
     assert "------------------ DataFrame info    ------------------" in dml_str
 
     # Check that specific data attributes are correctly included
-    assert "Outcome variable: y" in dml_str
-    assert "Treatment variable(s): ['d']" in dml_str
-    assert "Cluster variable(s): ['cluster_var1', 'cluster_var2']" in dml_str
+    assert "Outcome variable: Y" in dml_str
+    assert "Treatment variable(s): ['D']" in dml_str
+    assert "Cluster variable(s): ['cluster_var_i', 'cluster_var_j']" in dml_str
     assert "Covariates: " in dml_str
-    assert "Instrument variable(s): ['z']" in dml_str
-    assert "Is cluster data: True" in dml_str
-    assert "No. Observations:" in dml_str    # There's no TimeData or ScoreData here anymore, so the test is complete
-    # The specialized data classes will be tested in their own test files
+    assert "Instrument variable(s): ['Z']" in dml_str
+    assert "No. Observations:" in dml_str
+
+    # Test with additional optional attributes
+    df = dml_data.data.copy()
+    df["time_var"] = 1
+    df["score_var"] = 0.5
+
+    dml_data_with_optional = DoubleMLData(
+        data=df,
+        y_col="Y",
+        d_cols="D",
+        cluster_cols=["cluster_var_i", "cluster_var_j"],
+        z_cols="Z",
+        t_col="time_var",
+        s_col="score_var",
+    )
+
+    dml_str_optional = str(dml_data_with_optional)
+    assert "Time variable: time_var" in dml_str_optional
+    assert "Score/Selection variable: score_var" in dml_str_optional
diff --git a/doubleml/data/tests/test_dml_data.py b/doubleml/data/tests/test_dml_data.py
index a2ada74b..7dc94fd4 100644
--- a/doubleml/data/tests/test_dml_data.py
+++ b/doubleml/data/tests/test_dml_data.py
@@ -3,7 +3,8 @@
 import pytest
 from sklearn.linear_model import Lasso, LogisticRegression
 
-from doubleml import DoubleMLData, DoubleMLDIDCS, DoubleMLPLR, DoubleMLSSM
+from doubleml import DoubleMLData, DoubleMLDIDCS, DoubleMLPLR, DoubleMLSSM, DoubleMLDIDData, DoubleMLSSMData
+
 from doubleml.data.base_data import DoubleMLBaseData
 from doubleml.plm.datasets import (
     _make_pliv_data,
@@ -102,7 +103,7 @@ def test_obj_vs_from_arrays():
     assert np.array_equal(dml_data_from_array.data, dml_data.data)
 
     dml_data = make_did_SZ2020(n_obs=100, cross_sectional_data=True)
-    dml_data_from_array = DoubleMLData.from_arrays(
+    dml_data_from_array = DoubleMLDIDData.from_arrays(
         x=dml_data.data[dml_data.x_cols],
         y=dml_data.data[dml_data.y_col],
         d=dml_data.data[dml_data.d_cols],
@@ -113,7 +114,7 @@ def test_obj_vs_from_arrays():
     # check with instrument and time variable
     dml_data = make_did_SZ2020(n_obs=100, cross_sectional_data=True)
     dml_data.data["z"] = dml_data.data["t"]
-    dml_data_from_array = DoubleMLData.from_arrays(
+    dml_data_from_array = DoubleMLDIDData.from_arrays(
         x=dml_data.data[dml_data.x_cols],
         y=dml_data.data[dml_data.y_col],
         d=dml_data.data[dml_data.d_cols],
@@ -146,14 +147,11 @@ def test_dml_data_no_instr_no_time_no_selection():
     dml_data = make_plr_CCDDHNR2018(n_obs=100)
     assert dml_data.z is None
     assert dml_data.n_instr == 0
-    assert dml_data.t is None
 
     x, y, d = make_plr_CCDDHNR2018(n_obs=100, return_type="array")
     dml_data = DoubleMLData.from_arrays(x, y, d)
     assert dml_data.z is None
     assert dml_data.n_instr == 0
-    assert dml_data.t is None
-    assert dml_data.s is None
 
 
 @pytest.mark.ci
@@ -324,7 +322,7 @@ def test_t_col_setter():
     np.random.seed(3141)
     df = make_did_SZ2020(n_obs=100, cross_sectional_data=True, return_type=pd.DataFrame)
     df["t_new"] = np.ones(shape=(100,))
-    dml_data = DoubleMLData(df, "y", "d", [f"Z{i + 1}" for i in np.arange(4)], t_col="t")
+    dml_data = DoubleMLDIDData(df, "y", "d", z_cols=[f"Z{i + 1}" for i in np.arange(4)], t_col="t")
 
     # check that after changing t_col, the t array gets updated
     t_comp = dml_data.data["t_new"].values
@@ -349,7 +347,7 @@ def test_s_col_setter():
     np.random.seed(3141)
     df = make_ssm_data(n_obs=100, return_type=pd.DataFrame)
     df["s_new"] = np.ones(shape=(100,))
-    dml_data = DoubleMLData(df, "y", "d", [f"X{i + 1}" for i in np.arange(4)], s_col="s")
+    dml_data = DoubleMLSSMData(df, "y", "d", x_cols=[f"X{i + 1}" for i in np.arange(4)], s_col="s")
 
     # check that after changing s_col, the s array gets updated
     s_comp = dml_data.data["s_new"].values
diff --git a/doubleml/did/datasets/dgp_did_SZ2020.py b/doubleml/did/datasets/dgp_did_SZ2020.py
index ccfd4a80..0c0f31ab 100644
--- a/doubleml/did/datasets/dgp_did_SZ2020.py
+++ b/doubleml/did/datasets/dgp_did_SZ2020.py
@@ -2,13 +2,14 @@
 import pandas as pd
 from scipy.linalg import toeplitz
 
-from ...data.base_data import DoubleMLData
+from ...data.did_data import DoubleMLDIDData
 from ...data.panel_data import DoubleMLPanelData
-from ...utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+from ...utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_did_data_alias, _get_dml_panel_data_alias
 
 _array_alias = _get_array_alias()
 _data_frame_alias = _get_data_frame_alias()
-_dml_data_alias = _get_dml_data_alias()
+_dml_did_data_alias = _get_dml_did_data_alias()
+_dml_panel_data_alias = _get_dml_panel_data_alias()
 
 
 def _generate_features(n_obs, c, dim_x=4):
@@ -60,7 +61,7 @@ def _f_ps(w, xi):
     return res
 
 
-def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_type="DoubleMLData", **kwargs):
+def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_type="DoubleMLDIDData", **kwargs):
     """
     Generates data from a difference-in-differences model used in Sant'Anna and Zhao (2020).
     The data generating process is defined as follows. For a generic :math:`W=(W_1, W_2, W_3, W_4)^T`, let
@@ -130,7 +131,7 @@ def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_ty
     cross_sectional_data :
         Indicates whether the setting is uses cross-sectional or panel data. Default value is ``False``.
     return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+        If ``'DoubleMLDIDData'`` or ``DoubleMLDIDData``, returns a ``DoubleMLDIDData`` object.
 
         If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
 
@@ -181,13 +182,16 @@ def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_ty
 
         if return_type in _array_alias:
             return z, y, d, None
-        elif return_type in _data_frame_alias + _dml_data_alias:
+        elif return_type in _data_frame_alias + _dml_did_data_alias + _dml_panel_data_alias:
             z_cols = [f"Z{i + 1}" for i in np.arange(dim_x)]
             data = pd.DataFrame(np.column_stack((z, y, d)), columns=z_cols + ["y", "d"])
             if return_type in _data_frame_alias:
                 return data
+            elif return_type in _dml_did_data_alias:
+                return DoubleMLDIDData(data, "y", "d", z_cols=z_cols, t_col="t")
             else:
-                return DoubleMLData(data, "y", "d", z_cols)
+                assert return_type in _dml_panel_data_alias
+                return DoubleMLPanelData(data, "y", "d", t_col="t", id_col="id", x_cols=z_cols)
         elif return_type == "DoubleMLPanelData":
             z_cols = [f"Z{i + 1}" for i in np.arange(dim_x)]
             df0 = (
@@ -227,12 +231,15 @@ def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_ty
 
         if return_type in _array_alias:
             return z, y, d, t
-        elif return_type in _data_frame_alias + _dml_data_alias:
+        elif return_type in _data_frame_alias + _dml_did_data_alias + _dml_panel_data_alias:
             z_cols = [f"Z{i + 1}" for i in np.arange(dim_x)]
             data = pd.DataFrame(np.column_stack((z, y, d, t)), columns=z_cols + ["y", "d", "t"])
             if return_type in _data_frame_alias:
                 return data
+            elif return_type in _dml_did_data_alias:
+                return DoubleMLDIDData(data, "y", "d", z_cols=z_cols, t_col="t")
             else:
-                return DoubleMLData(data, "y", "d", z_cols, t_col="t")
+                assert return_type in _dml_panel_data_alias
+                return DoubleMLPanelData(data, "y", "d", t_col="t", id_col="id", x_cols=z_cols)
         else:
             raise ValueError("Invalid return_type.")
diff --git a/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py b/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
index a882c678..df2b4cbe 100644
--- a/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
+++ b/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
@@ -2,7 +2,7 @@
 import pandas as pd
 from scipy.linalg import toeplitz
 
-from doubleml.data import DoubleMLData
+from doubleml.data import DoubleMLClusterData
 from doubleml.utils._aliases import _array_alias, _data_frame_alias, _dml_cluster_data_alias
 
 
@@ -184,7 +184,9 @@ def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1.0, return
     y = d * theta + np.matmul(x, zeta_0) + eps
 
     cluster_cols = ["cluster_var_i", "cluster_var_j"]
-    cluster_vars = pd.MultiIndex.from_product([range(N), range(M)]).to_frame(name=cluster_cols).reset_index(drop=True)    if return_type in _array_alias:
+    cluster_vars = pd.MultiIndex.from_product([range(N), range(M)]).to_frame(name=cluster_cols).reset_index(drop=True)
+
+    if return_type in _array_alias:
         return x, y, d, cluster_vars.values, z
     elif return_type in _data_frame_alias + _dml_cluster_data_alias:
         x_cols = [f"X{i + 1}" for i in np.arange(dim_X)]
@@ -192,6 +194,6 @@ def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1.0, return
         if return_type in _data_frame_alias:
             return data
         else:
-            return DoubleMLData(data, "Y", "D", x_cols, "Z", cluster_cols, is_cluster_data=True)
+            return DoubleMLClusterData(data, "Y", "D", cluster_cols, x_cols, "Z")
     else:
         raise ValueError("Invalid return_type.")
diff --git a/doubleml/tests/test_exceptions_fixed.py b/doubleml/tests/test_exceptions_fixed.py
new file mode 100644
index 00000000..e69de29b
diff --git a/doubleml/tests/test_multiway_cluster.py b/doubleml/tests/test_multiway_cluster.py
index c3425239..10e5d445 100644
--- a/doubleml/tests/test_multiway_cluster.py
+++ b/doubleml/tests/test_multiway_cluster.py
@@ -18,10 +18,9 @@
 M = 25  # number of observations (second dimension)
 dim_x = 100  # dimension of x
 
-(x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x, return_type="array")
-obj_dml_cluster_data = dml.DoubleMLData.from_arrays(x, y, d, z=z, cluster_vars=cluster_vars, is_cluster_data=True)
+obj_dml_cluster_data = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x)
 
-(x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(
+obj_dml_oneway_cluster_data = make_pliv_multiway_cluster_CKMS2021(
     N,
     M,
     dim_x,
@@ -29,11 +28,9 @@
     omega_epsilon=np.array([0.25, 0]),
     omega_v=np.array([0.25, 0]),
     omega_V=np.array([0.25, 0]),
-    return_type="array"
 )
-obj_dml_oneway_cluster_data = dml.DoubleMLData.from_arrays(x, y, d, z=z, cluster_vars=cluster_vars, is_cluster_data=True)
 # only the first cluster variable is relevant with the weight setting above
-obj_dml_oneway_cluster_data.cluster_cols = "cluster_var1"
+obj_dml_oneway_cluster_data.cluster_cols = "cluster_var_i"
 
 
 @pytest.fixture(
diff --git a/doubleml/tests/test_nonlinear_cluster.py b/doubleml/tests/test_nonlinear_cluster.py
index 9a2c585a..71998941 100644
--- a/doubleml/tests/test_nonlinear_cluster.py
+++ b/doubleml/tests/test_nonlinear_cluster.py
@@ -7,7 +7,7 @@
 from sklearn.linear_model import Lasso, LinearRegression
 
 import doubleml as dml
-from doubleml import DoubleMLData
+from doubleml import DoubleMLClusterData
 from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
 
 from .test_nonlinear_score_mixin import DoubleMLPLRWithNonLinearScoreMixin
@@ -20,7 +20,7 @@
 
 # create data without insturment for plr
 x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x, return_type="array")
-obj_dml_cluster_data = DoubleMLData.from_arrays(x, y, d, cluster_vars=cluster_vars, is_cluster_data=True)
+obj_dml_cluster_data = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
 
 x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(
     N,
@@ -32,7 +32,7 @@
     omega_V=np.array([0.25, 0]),
     return_type="array",
 )
-obj_dml_oneway_cluster_data = DoubleMLData.from_arrays(x, y, d, cluster_vars=cluster_vars, is_cluster_data=True)
+obj_dml_oneway_cluster_data = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
 
 # only the first cluster variable is relevant with the weight setting above
 obj_dml_oneway_cluster_data.cluster_cols = "cluster_var1"
@@ -188,14 +188,15 @@ def dml_plr_cluster_nonlinear_with_index(generate_data1, learner):
 
     # Set machine learning methods for m & l
     ml_l = clone(learner)
-    ml_m = clone(learner)    
+    ml_m = clone(learner)
+
     obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols)
     np.random.seed(3141)
     dml_plr_obj = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data, ml_l, ml_m, n_folds=n_folds)
     dml_plr_obj.fit()
-    
+
     df = data.reset_index()
-    dml_cluster_data = dml.DoubleMLData(df, y_col="y", d_cols="d", x_cols=x_cols, cluster_cols="index", is_cluster_data=True)
+    dml_cluster_data = dml.DoubleMLClusterData(df, y_col="y", d_cols="d", x_cols=x_cols, cluster_cols="index")
     np.random.seed(3141)
     dml_plr_cluster_obj = DoubleMLPLRWithNonLinearScoreMixin(dml_cluster_data, ml_l, ml_m, n_folds=n_folds)
     dml_plr_cluster_obj.fit()
diff --git a/doubleml/tests/test_return_types_fixed.py b/doubleml/tests/test_return_types_fixed.py
new file mode 100644
index 00000000..e69de29b
diff --git a/doubleml/tests/test_sensitivity_cluster.py b/doubleml/tests/test_sensitivity_cluster.py
index a4b46e1a..83f8c270 100644
--- a/doubleml/tests/test_sensitivity_cluster.py
+++ b/doubleml/tests/test_sensitivity_cluster.py
@@ -17,7 +17,7 @@
 
 
 (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x, return_type="array")
-obj_dml_cluster_data = dml.DoubleMLData.from_arrays(x, y, d, z=None, cluster_vars=cluster_vars, is_cluster_data=True)
+obj_dml_cluster_data = dml.DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
 
 (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(
     N,
@@ -29,7 +29,7 @@
     omega_V=np.array([0.25, 0]),
     return_type="array",
 )
-obj_dml_oneway_cluster_data = dml.DoubleMLData.from_arrays(x, y, d, z=None, cluster_vars=cluster_vars, is_cluster_data=True)
+obj_dml_oneway_cluster_data = dml.DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
 # only the first cluster variable is relevant with the weight setting above
 obj_dml_oneway_cluster_data.cluster_cols = "cluster_var1"
 
diff --git a/doubleml/utils/_aliases.py b/doubleml/utils/_aliases.py
index 679c80d3..b1dcaa21 100644
--- a/doubleml/utils/_aliases.py
+++ b/doubleml/utils/_aliases.py
@@ -1,13 +1,23 @@
 import numpy as np
 import pandas as pd
 
-from doubleml.data import DoubleMLData
+from doubleml.data import (
+    DoubleMLClusterData,
+    DoubleMLData,
+    DoubleMLDIDData,
+    DoubleMLPanelData,
+    DoubleMLRDDData,
+    DoubleMLSSMData,
+)
 
 _array_alias = ["array", "np.ndarray", "np.array", np.ndarray]
 _data_frame_alias = ["DataFrame", "pd.DataFrame", pd.DataFrame]
 _dml_data_alias = ["DoubleMLData", DoubleMLData]
-# For backwards compatibility, DoubleMLClusterData is now an alias for DoubleMLData with is_cluster_data=True
-_dml_cluster_data_alias = ["DoubleMLClusterData", "DoubleMLData"]
+_dml_did_data_alias = ["DoubleMLDIDData", DoubleMLDIDData]
+_dml_panel_data_alias = ["DoubleMLPanelData", DoubleMLPanelData]
+_dml_rdd_data_alias = ["DoubleMLRDDData", DoubleMLRDDData]
+_dml_ssm_data_alias = ["DoubleMLSSMData", DoubleMLSSMData]
+_dml_cluster_data_alias = ["DoubleMLClusterData", DoubleMLClusterData]
 
 
 def _get_array_alias():
@@ -28,3 +38,23 @@ def _get_dml_data_alias():
 def _get_dml_cluster_data_alias():
     """Returns the list of DoubleMLClusterData aliases."""
     return _dml_cluster_data_alias
+
+
+def _get_dml_did_data_alias():
+    """Returns the list of DoubleMLDIDData aliases."""
+    return _dml_did_data_alias
+
+
+def _get_dml_panel_data_alias():
+    """Returns the list of DoubleMLPanelData aliases."""
+    return _dml_panel_data_alias
+
+
+def _get_dml_rdd_data_alias():
+    """Returns the list of DoubleMLRDDData aliases."""
+    return _dml_rdd_data_alias
+
+
+def _get_dml_ssm_data_alias():
+    """Returns the list of DoubleMLSSMData aliases."""
+    return _dml_ssm_data_alias
diff --git a/doubleml/utils/_check_return_types_fixed.py b/doubleml/utils/_check_return_types_fixed.py
new file mode 100644
index 00000000..e69de29b

From 97abdd87dfba71d8c7c7f5fb9cd3b2bde66243e9 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Thu, 5 Jun 2025 15:59:55 +0200
Subject: [PATCH 25/84] update data backends

---
 doubleml/data/base_data.py              |  41 ++++++++--
 doubleml/data/did_data.py               | 101 +++++++++++++++---------
 doubleml/data/ssm_data.py               |  89 +++++++++++++++------
 doubleml/data/tests/test_dml_data.py    |  52 +++++-------
 doubleml/did/datasets/dgp_did_SZ2020.py |  17 ++--
 5 files changed, 187 insertions(+), 113 deletions(-)

diff --git a/doubleml/data/base_data.py b/doubleml/data/base_data.py
index 8d585633..9ba8bc00 100644
--- a/doubleml/data/base_data.py
+++ b/doubleml/data/base_data.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pandas as pd
 from sklearn.utils import assert_all_finite
+from sklearn.utils.multiclass import type_of_target
 from sklearn.utils.validation import check_array, check_consistent_length, column_or_1d
 
 from doubleml.utils._estimation import _assure_2d_array
@@ -11,8 +12,9 @@
 
 class DoubleMLBaseData(ABC):
     """Bas        x_cols = [f"X{i + 1}" for i in np.arange(x.shape[1])]
-        # baseline version with features, outcome and treatments
-        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + [y_col] + d_cols)Class Double machine learning data-backends"""
+    # baseline version with features, outcome and treatments
+    data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + [y_col] + d_cols)Class Double machine learning data-backends
+    """
 
     def __init__(self, data):
         if not isinstance(data, pd.DataFrame):
@@ -108,11 +110,11 @@ class DoubleMLData(DoubleMLBaseData):
         Default is ``None``.    cluster_cols : None, str or list
         The cluster variable(s).
         Default is ``None``.
-        
+
     use_other_treat_as_covariate : bool
         Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
         Default is ``True``.
-        
+
     is_cluster_data : bool
         Flag indicating whether this data object is being used for cluster data.
         Default is ``False``.
@@ -191,7 +193,7 @@ def __str__(self):
             + "\n------------------ DataFrame info    ------------------\n"
             + df_info
         )
-        return res    
+        return res
 
     def _data_summary_str(self):
         data_summary = (
@@ -204,7 +206,7 @@ def _data_summary_str(self):
         if self.cluster_cols is not None:
             data_summary += f"Cluster variable(s): {self.cluster_cols}\n"
 
-        if hasattr(self, 'is_cluster_data') and self.is_cluster_data:
+        if hasattr(self, "is_cluster_data") and self.is_cluster_data:
             data_summary += f"Is cluster data: {self.is_cluster_data}\n"
         data_summary += f"No. Observations: {self.n_obs}\n"
         return data_summary
@@ -328,7 +330,7 @@ def from_arrays(
         else:
             d_cols = [f"d{i + 1}" for i in np.arange(d.shape[1])]
 
-        x_cols = [f"X{i + 1}" for i in np.arange(x.shape[1])]        # baseline version with features, outcome and treatments
+        x_cols = [f"X{i + 1}" for i in np.arange(x.shape[1])]  # baseline version with features, outcome and treatments
         data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + [y_col] + d_cols)
         if z is not None:
             df_z = pd.DataFrame(z, columns=z_cols)
@@ -708,6 +710,31 @@ def _get_optional_col_sets(self):
         cluster_cols_set = set(self.cluster_cols or [])
         return [cluster_cols_set, z_cols_set]
 
+    def _check_binary_treats(self):
+        is_binary = pd.Series(dtype=bool, index=self.d_cols)
+        if not self.force_all_d_finite:
+            is_binary[:] = False  # if we allow infinite values, we cannot check for binary
+        else:
+            for treatment_var in self.d_cols:
+                this_d = self.data.loc[:, treatment_var]
+                binary_treat = type_of_target(this_d) == "binary"
+                zero_one_treat = np.all((np.power(this_d, 2) - this_d) == 0)
+                is_binary[treatment_var] = binary_treat & zero_one_treat
+        return is_binary
+
+    def _check_binary_outcome(self):
+        y = self.data.loc[:, self.y_col]
+        binary_outcome = type_of_target(y) == "binary"
+        zero_one_outcome = np.all((np.power(y, 2) - y) == 0)
+        is_binary = binary_outcome & zero_one_outcome
+        return is_binary
+
+    @staticmethod
+    def _check_disjoint(set1, set2, name1, arg1, name2, arg2):
+        """Helper method to check for disjoint sets."""
+        if not set1.isdisjoint(set2):
+            raise ValueError(f"At least one variable/column is set as {name1} ({arg1}) and {name2} ({arg2}).")
+
     def _check_disjoint_sets(self):
         # this function can be extended in inherited subclasses
         self._check_disjoint_sets_y_d_x()
diff --git a/doubleml/data/did_data.py b/doubleml/data/did_data.py
index b528ead8..c7909b4e 100644
--- a/doubleml/data/did_data.py
+++ b/doubleml/data/did_data.py
@@ -1,9 +1,12 @@
 import io
 import pandas as pd
 from sklearn.utils.validation import check_array
+from sklearn.utils import assert_all_finite
 
 from doubleml.data.base_data import DoubleMLData
 from doubleml.utils._estimation import _assure_2d_array
+from sklearn.utils.validation import check_array, check_consistent_length, column_or_1d
+from sklearn.utils.multiclass import type_of_target
 
 
 class DoubleMLDIDData(DoubleMLData):
@@ -80,10 +83,12 @@ def __init__(
         use_other_treat_as_covariate=True,
         force_all_x_finite=True,
         force_all_d_finite=True,
-    ):
-        # Initialize _t_col to None first to avoid AttributeError during parent init
+    ):        # Initialize _t_col to None first to avoid AttributeError during parent init
         self._t_col = None
-        
+
+        # Store whether x_cols was originally None to reset it later
+        x_cols_was_none = x_cols is None
+
         # Call parent constructor first to set _data
         super().__init__(
             data=data,
@@ -97,8 +102,25 @@ def __init__(
             force_all_d_finite=force_all_d_finite,
         )
 
-        # Set time column after parent constructor (which sets _data)
-        self.t_col = t_col
+        # Set time column directly to avoid triggering checks during init
+        if t_col is not None:
+            if not isinstance(t_col, str):
+                raise TypeError(
+                    "The time variable t_col must be of str type (or None). "
+                    f"{str(t_col)} of type {str(type(t_col))} was passed."
+                )
+            if t_col not in self.all_variables:
+                raise ValueError(f"Invalid time variable t_col. {t_col} is no data column.")
+        self._t_col = t_col
+
+        # If x_cols was originally None, reset it to exclude the time column
+        if x_cols_was_none and t_col is not None:
+            self.x_cols = None
+
+        # Now run the checks and set variables
+        if t_col is not None:
+            self._check_disjoint_sets()
+            self._set_y_z_t()
 
         # Set time variable array after data is loaded
         self._set_time_var()
@@ -109,8 +131,8 @@ def from_arrays(
         x,
         y,
         d,
-        t,
         z=None,
+        t=None,
         cluster_vars=None,
         use_other_treat_as_covariate=True,
         force_all_x_finite=True,
@@ -165,11 +187,13 @@ def from_arrays(
         >>> obj_dml_data_from_array = DoubleMLDIDData.from_arrays(x, y, d, t=t)
         """
         # Prepare time variable
-        t = check_array(t, ensure_2d=False, allow_nd=False)
-        t = _assure_2d_array(t)
-        if t.shape[1] != 1:
-            raise ValueError("t must be a single column.")
-        t_col = "t"
+
+        if t is None:
+            t_col = None
+        else:
+            t = column_or_1d(t, warn=True)
+            check_consistent_length(x, y, d, t)
+            t_col = "t"
 
         # Create base data using parent class method
         base_data = DoubleMLData.from_arrays(
@@ -179,13 +203,16 @@ def from_arrays(
         # Add time variable to the DataFrame
         data = pd.concat((base_data.data, pd.DataFrame(t, columns=[t_col])), axis=1)
 
+        if t is not None:
+            data[t_col] = t
+
         return cls(
             data,
             base_data.y_col,
             base_data.d_cols,
-            t_col,
             base_data.x_cols,
             base_data.z_cols,
+            t_col,
             base_data.cluster_cols,
             base_data.use_other_treat_as_covariate,
             base_data.force_all_x_finite,
@@ -199,30 +226,6 @@ def t_col(self):
         """
         return self._t_col
 
-    @t_col.setter
-    def t_col(self, value):
-        if not isinstance(value, str):
-            raise TypeError(
-                "The time variable t_col must be of str type. " f"{str(value)} of type {str(type(value))} was passed."
-            )
-        # Check if data exists (during initialization it might not)
-        if hasattr(self, "_data") and value not in self.all_variables:
-            raise ValueError("Invalid time variable t_col. The time variable is no data column.")
-        self._t_col = value
-        # Update time variable array if data is already loaded
-        if hasattr(self, "_data"):
-            self._set_time_var()
-
-    @property
-    def t(self):
-        """
-        Array of time variable.
-        """
-        if self.t_col is not None:
-            return self._t.values
-        else:
-            return None
-
     @t_col.setter
     def t_col(self, value):
         reset_value = hasattr(self, "_t_col")
@@ -239,8 +242,18 @@ def t_col(self, value):
             self._t_col = None
         if reset_value:
             self._check_disjoint_sets()
-            self._set_y_z_t_s()
+            self._set_y_z_t()
+
 
+    @property
+    def t(self):
+        """
+        Array of time variable.
+        """
+        if self.t_col is not None:
+            return self._t.values
+        else:
+            return None
 
     def _get_optional_col_sets(self):
         """Get optional column sets including time column."""
@@ -254,7 +267,8 @@ def _check_disjoint_sets(self):
         """Check that time column doesn't overlap with other variables."""
         # Apply standard checks from parent class
         super()._check_disjoint_sets()
-        self._check_disjoint_sets_t_col()
+        if self.t_col is not None:
+            self._check_disjoint_sets_t_col()
 
     def _check_disjoint_sets_t_col(self):
         """Check that time column is disjoint from other variable sets."""
@@ -287,6 +301,17 @@ def _set_time_var(self):
         if hasattr(self, "_data") and self.t_col in self.data.columns:
             self._t = self.data.loc[:, [self.t_col]]
 
+    def _set_y_z_t(self):
+        def _set_attr(col):
+            if col is None:
+                return None
+            assert_all_finite(self.data.loc[:, col])
+            return self.data.loc[:, col]
+
+        self._y = _set_attr(self.y_col)
+        self._z = _set_attr(self.z_cols)
+        self._t = _set_attr(self.t_col)
+
     def __str__(self):
         """String representation."""
         data_summary = self._data_summary_str()
diff --git a/doubleml/data/ssm_data.py b/doubleml/data/ssm_data.py
index 301a4234..425fbde5 100644
--- a/doubleml/data/ssm_data.py
+++ b/doubleml/data/ssm_data.py
@@ -1,6 +1,7 @@
 import io
 import pandas as pd
 from sklearn.utils.validation import check_array
+from sklearn.utils import assert_all_finite
 
 from doubleml.data.base_data import DoubleMLData
 from doubleml.utils._estimation import _assure_2d_array
@@ -66,8 +67,7 @@ class DoubleMLSSMData(DoubleMLData):
     >>> df = make_ssm_data(return_type='DataFrame')
     >>> obj_dml_data_from_df = DoubleMLSSMData(df, 'y', 'd', 's')
     >>> # initialization from np.ndarray
-    >>> (x, y, d, s) = make_ssm_data(return_type='array')
-    >>> obj_dml_data_from_array = DoubleMLSSMData.from_arrays(x, y, d, s=s)
+    >>> (x, y, d, s) = make_ssm_data(return_type='array')    >>> obj_dml_data_from_array = DoubleMLSSMData.from_arrays(x, y, d, s=s)
     """
 
     def __init__(
@@ -83,8 +83,11 @@ def __init__(
         force_all_x_finite=True,
         force_all_d_finite=True,
     ):
-        # Set selection column before calling parent constructor
-        self.s_col = s_col
+        # Initialize _s_col to None first to avoid AttributeError during parent init
+        self._s_col = None
+
+        # Store whether x_cols was originally None to reset it later
+        x_cols_was_none = x_cols is None
 
         # Call parent constructor
         super().__init__(
@@ -99,6 +102,26 @@ def __init__(
             force_all_d_finite=force_all_d_finite,
         )
 
+        # Set selection column directly to avoid triggering checks during init
+        if s_col is not None:
+            if not isinstance(s_col, str):
+                raise TypeError(
+                    "The selection variable s_col must be of str type (or None). "
+                    f"{str(s_col)} of type {str(type(s_col))} was passed."
+                )
+            if s_col not in self.all_variables:
+                raise ValueError(f"Invalid selection variable s_col. {s_col} is no data column.")
+        self._s_col = s_col
+
+        # If x_cols was originally None, reset it to exclude the selection column
+        if x_cols_was_none and s_col is not None:
+            self.x_cols = None
+
+        # Now run the checks and set variables
+        if s_col is not None:
+            self._check_disjoint_sets()
+            self._set_y_z_s()
+
         # Set selection variable array after data is loaded
         self._set_selection_var()
 
@@ -193,6 +216,14 @@ def from_arrays(
         )
 
     @property
+    def s(self):
+        """
+        Array of score or selection variable.
+        """
+        if self.s_col is not None:
+            return self._s.values
+        else:
+            return None    @property
     def s_col(self):
         """
         The selection variable.
@@ -201,30 +232,29 @@ def s_col(self):
 
     @s_col.setter
     def s_col(self, value):
-        if not isinstance(value, str):
-            raise TypeError(
-                "The selection variable s_col must be of str type. " f"{str(value)} of type {str(type(value))} was passed."
-            )
-        # Check if data exists (during initialization it might not)
-        if hasattr(self, "_data") and value not in self.all_variables:
-            raise ValueError("Invalid selection variable s_col. The selection variable is no data column.")
-        self._s_col = value
-        # Update selection variable array if data is already loaded
-        if hasattr(self, "_data"):
-            self._set_selection_var()
-
-    @property
-    def s(self):
-        """
-        Array of selection variable.
-        """
-        return self._s.values
+        reset_value = hasattr(self, "_s_col")
+        if value is not None:
+            if not isinstance(value, str):
+                raise TypeError(
+                    "The selection variable s_col must be of str type (or None). "
+                    f"{str(value)} of type {str(type(value))} was passed."
+                )
+            if value not in self.all_variables:
+                raise ValueError(f"Invalid selection variable s_col. {value} is no data column.")
+            self._s_col = value
+        else:
+            self._s_col = None
+        if reset_value:
+            self._check_disjoint_sets()
+            self._set_y_z_s()
 
     def _get_optional_col_sets(self):
         """Get optional column sets including selection column."""
         base_optional_col_sets = super()._get_optional_col_sets()
-        s_col_set = {self.s_col}
-        return [s_col_set] + base_optional_col_sets
+        if self.s_col is not None:
+            s_col_set = {self.s_col}
+            return [s_col_set] + base_optional_col_sets
+        return base_optional_col_sets
 
     def _check_disjoint_sets(self):
         """Check that selection column doesn't overlap with other variables."""
@@ -263,6 +293,17 @@ def _set_selection_var(self):
         if hasattr(self, "_data") and self.s_col in self.data.columns:
             self._s = self.data.loc[:, [self.s_col]]
 
+    def _set_y_z_s(self):
+        def _set_attr(col):
+            if col is None:
+                return None
+            assert_all_finite(self.data.loc[:, col])
+            return self.data.loc[:, col]
+
+        self._y = _set_attr(self.y_col)
+        self._z = _set_attr(self.z_cols)
+        self._s = _set_attr(self.s_col)
+
     def __str__(self):
         """String representation."""
         data_summary = self._data_summary_str()
diff --git a/doubleml/data/tests/test_dml_data.py b/doubleml/data/tests/test_dml_data.py
index 7dc94fd4..a84e4e77 100644
--- a/doubleml/data/tests/test_dml_data.py
+++ b/doubleml/data/tests/test_dml_data.py
@@ -67,7 +67,7 @@ def test_obj_vs_from_arrays():
     dml_data_from_array = DoubleMLData.from_arrays(
         dml_data.data[dml_data.x_cols], dml_data.data[dml_data.y_col], dml_data.data[dml_data.d_cols]
     )
-    assert dml_data_from_array.data.equals(dml_data.data)
+    assert np.array_equal(dml_data_from_array.data, dml_data.data)
 
     dml_data = _make_pliv_data(n_obs=100)
     dml_data_from_array = DoubleMLData.from_arrays(
@@ -76,7 +76,7 @@ def test_obj_vs_from_arrays():
         dml_data.data[dml_data.d_cols],
         dml_data.data[dml_data.z_cols],
     )
-    assert dml_data_from_array.data.equals(dml_data.data)
+    assert np.array_equal(dml_data_from_array.data, dml_data.data)
 
     dml_data = make_pliv_CHS2015(n_obs=100, dim_z=5)
     dml_data_from_array = DoubleMLData.from_arrays(
@@ -191,32 +191,22 @@ def test_x_cols_setter_defaults():
 
     # without instrument with time
     df = pd.DataFrame(np.tile(np.arange(5), (4, 1)), columns=["yy", "dd", "xx1", "xx2", "tt"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", t_col="tt")
+    dml_data = DoubleMLDIDData(df, y_col="yy", d_cols="dd", t_col="tt")
     assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with instrument with time
     df = pd.DataFrame(np.tile(np.arange(6), (4, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "tt"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", z_cols="zz", t_col="tt")
+    dml_data = DoubleMLDIDData(df, y_col="yy", d_cols="dd", z_cols="zz", t_col="tt")
     assert dml_data.x_cols == ["xx1", "xx2"]
 
     # without instrument with selection
     df = pd.DataFrame(np.tile(np.arange(5), (4, 1)), columns=["yy", "dd", "xx1", "xx2", "ss"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", s_col="ss")
+    dml_data = DoubleMLSSMData(df, y_col="yy", d_cols="dd", s_col="ss")
     assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with instrument with selection
     df = pd.DataFrame(np.tile(np.arange(6), (4, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "ss"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", z_cols="zz", s_col="ss")
-    assert dml_data.x_cols == ["xx1", "xx2"]
-
-    # with selection and time
-    df = pd.DataFrame(np.tile(np.arange(6), (4, 1)), columns=["yy", "dd", "xx1", "xx2", "tt", "ss"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", t_col="tt", s_col="ss")
-    assert dml_data.x_cols == ["xx1", "xx2"]
-
-    # with instrument, selection and time
-    df = pd.DataFrame(np.tile(np.arange(7), (4, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "tt", "ss"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", z_cols="zz", t_col="tt", s_col="ss")
+    dml_data = DoubleMLSSMData(df, y_col="yy", d_cols="dd", z_cols="zz", s_col="ss")
     assert dml_data.x_cols == ["xx1", "xx2"]
 
 
@@ -322,7 +312,7 @@ def test_t_col_setter():
     np.random.seed(3141)
     df = make_did_SZ2020(n_obs=100, cross_sectional_data=True, return_type=pd.DataFrame)
     df["t_new"] = np.ones(shape=(100,))
-    dml_data = DoubleMLDIDData(df, "y", "d", z_cols=[f"Z{i + 1}" for i in np.arange(4)], t_col="t")
+    dml_data = DoubleMLDIDData(df, "y", "d", x_cols=[f"Z{i + 1}" for i in np.arange(4)], t_col="t")
 
     # check that after changing t_col, the t array gets updated
     t_comp = dml_data.data["t_new"].values
@@ -460,42 +450,38 @@ def test_disjoint_sets():
     # time variable
     msg = r"At least one variable/column is set as outcome variable \(``y_col``\) and time variable \(``t_col``\)."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], t_col="yy")
+        _ = DoubleMLDIDData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], t_col="yy")
     msg = r"At least one variable/column is set as treatment variable \(``d_cols``\) and time variable \(``t_col``\)."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], t_col="dd1")
+        _ = DoubleMLDIDData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], t_col="dd1")
     msg = r"At least one variable/column is set as covariate \(``x_cols``\) and time variable \(``t_col``\)."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], t_col="xx2")
+        _ = DoubleMLDIDData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], t_col="xx2")
     msg = r"At least one variable/column is set as instrumental variable \(``z_cols``\) and time variable \(``t_col``\)."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], z_cols="zz", t_col="zz")
+        _ = DoubleMLDIDData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], z_cols="zz", t_col="zz")
 
     # score or selection variable
     msg = (
-        r"At least one variable/column is set as outcome variable \(``y_col``\) and score or selection variable \(``s_col``\)."
+        r"At least one variable/column is set as outcome variable \(``y_col``\) and selection variable \(``s_col``\)."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], s_col="yy")
+        _ = DoubleMLSSMData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], s_col="yy")
     msg = (
         r"At least one variable/column is set as treatment variable \(``d_cols``\) "
-        r"and score or selection variable \(``s_col``\)."
+        r"and selection variable \(``s_col``\)."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], s_col="dd1")
-    msg = r"At least one variable/column is set as covariate \(``x_cols``\) and score or selection variable \(``s_col``\)."
+        _ = DoubleMLSSMData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], s_col="dd1")
+    msg = r"At least one variable/column is set as covariate \(``x_cols``\) and selection variable \(``s_col``\)."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], s_col="xx2")
+        _ = DoubleMLSSMData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], s_col="xx2")
     msg = (
         r"At least one variable/column is set as instrumental variable \(``z_cols``\) "
-        r"and score or selection variable \(``s_col``\)."
+        r"and selection variable \(``s_col``\)."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], z_cols="zz", s_col="zz")
-    msg = r"At least one variable/column is set as time variable \(``t_col``\) and score or selection variable \(``s_col``\)."
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], t_col="tt", s_col="tt")
-
+        _ = DoubleMLSSMData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], z_cols="zz", s_col="zz")
 
 @pytest.mark.ci
 def test_duplicates():
diff --git a/doubleml/did/datasets/dgp_did_SZ2020.py b/doubleml/did/datasets/dgp_did_SZ2020.py
index 0c0f31ab..db82b032 100644
--- a/doubleml/did/datasets/dgp_did_SZ2020.py
+++ b/doubleml/did/datasets/dgp_did_SZ2020.py
@@ -12,6 +12,7 @@
 _dml_panel_data_alias = _get_dml_panel_data_alias()
 
 
+
 def _generate_features(n_obs, c, dim_x=4):
     cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
     x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=n_obs)
@@ -182,16 +183,13 @@ def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_ty
 
         if return_type in _array_alias:
             return z, y, d, None
-        elif return_type in _data_frame_alias + _dml_did_data_alias + _dml_panel_data_alias:
+        elif return_type in _data_frame_alias + _dml_did_data_alias:
             z_cols = [f"Z{i + 1}" for i in np.arange(dim_x)]
             data = pd.DataFrame(np.column_stack((z, y, d)), columns=z_cols + ["y", "d"])
             if return_type in _data_frame_alias:
                 return data
-            elif return_type in _dml_did_data_alias:
-                return DoubleMLDIDData(data, "y", "d", z_cols=z_cols, t_col="t")
             else:
-                assert return_type in _dml_panel_data_alias
-                return DoubleMLPanelData(data, "y", "d", t_col="t", id_col="id", x_cols=z_cols)
+                return DoubleMLDIDData(data, "y", "d", x_cols=z_cols)
         elif return_type == "DoubleMLPanelData":
             z_cols = [f"Z{i + 1}" for i in np.arange(dim_x)]
             df0 = (
@@ -231,15 +229,12 @@ def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_ty
 
         if return_type in _array_alias:
             return z, y, d, t
-        elif return_type in _data_frame_alias + _dml_did_data_alias + _dml_panel_data_alias:
+        elif return_type in _data_frame_alias + _dml_did_data_alias:
             z_cols = [f"Z{i + 1}" for i in np.arange(dim_x)]
             data = pd.DataFrame(np.column_stack((z, y, d, t)), columns=z_cols + ["y", "d", "t"])
             if return_type in _data_frame_alias:
                 return data
             elif return_type in _dml_did_data_alias:
-                return DoubleMLDIDData(data, "y", "d", z_cols=z_cols, t_col="t")
-            else:
-                assert return_type in _dml_panel_data_alias
-                return DoubleMLPanelData(data, "y", "d", t_col="t", id_col="id", x_cols=z_cols)
+                return DoubleMLDIDData(data, "y", "d", x_cols=z_cols, t_col="t")
         else:
-            raise ValueError("Invalid return_type.")
+            raise ValueError("Invalid return_type.")
\ No newline at end of file

From 9f6f5d432a9c259344cab0dcc80fc19a7af5ac35 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 5 Jun 2025 16:10:57 +0200
Subject: [PATCH 26/84] add _n_obs_sample_splitting property to doubleml class

---
 doubleml/double_ml.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index c2d3727b..58b8692a 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -98,6 +98,7 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting):
         # perform sample splitting
         self._smpls = None
         self._smpls_cluster = None
+        self._n_obs_sample_splitting = self.n_obs
         if draw_sample_splitting:
             self.draw_sample_splitting()
 
@@ -1200,37 +1201,30 @@ def evaluate_learners(self, learners=None, metric=_rmse):
                 f"The learners have to be a subset of {str(self.params_names)}. Learners {str(learners)} provided."
             )
 
-    def draw_sample_splitting(self, n_obs=None):
+    def draw_sample_splitting(self):
         """
         Draw sample splitting for DoubleML models.
 
         The samples are drawn according to the attributes
         ``n_folds`` and ``n_rep``.
 
-        Parameters
-        ----------
-        n_obs : int or None
-            The number of observations to resample. If ``None``, the number of observations is set to the number
-            of observations in the data set.
-
         Returns
         -------
         self : object
         """
-        if n_obs is None:
-            n_obs = self.n_obs
-
         if self._is_cluster_data:
             obj_dml_resampling = DoubleMLClusterResampling(
                 n_folds=self._n_folds_per_cluster,
                 n_rep=self.n_rep,
-                n_obs=n_obs,
+                n_obs=self._n_obs_sample_splitting,
                 n_cluster_vars=self._dml_data.n_cluster_vars,
                 cluster_vars=self._dml_data.cluster_vars,
             )
             self._smpls, self._smpls_cluster = obj_dml_resampling.split_samples()
         else:
-            obj_dml_resampling = DoubleMLResampling(n_folds=self.n_folds, n_rep=self.n_rep, n_obs=n_obs, stratify=self._strata)
+            obj_dml_resampling = DoubleMLResampling(
+                n_folds=self.n_folds, n_rep=self.n_rep, n_obs=self._n_obs_sample_splitting, stratify=self._strata
+            )
             self._smpls = obj_dml_resampling.split_samples()
 
         return self

From b96a8392392e21387243698ffe5505205d667fda Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Thu, 5 Jun 2025 16:22:34 +0200
Subject: [PATCH 27/84] some progress on refactoring the data backends.

---
 doubleml/data/ssm_data.py             | 11 +++++++----
 doubleml/data/tests/test_dml_data.py  |  4 ++--
 doubleml/irm/datasets/dgp_ssm_data.py | 14 +++++++-------
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/doubleml/data/ssm_data.py b/doubleml/data/ssm_data.py
index 425fbde5..91c50bb0 100644
--- a/doubleml/data/ssm_data.py
+++ b/doubleml/data/ssm_data.py
@@ -75,9 +75,9 @@ def __init__(
         data,
         y_col,
         d_cols,
-        s_col,
         x_cols=None,
         z_cols=None,
+        s_col=None,
         cluster_cols=None,
         use_other_treat_as_covariate=True,
         force_all_x_finite=True,
@@ -131,8 +131,8 @@ def from_arrays(
         x,
         y,
         d,
-        s,
         z=None,
+        s=None,
         cluster_vars=None,
         use_other_treat_as_covariate=True,
         force_all_x_finite=True,
@@ -206,9 +206,9 @@ def from_arrays(
             data,
             base_data.y_col,
             base_data.d_cols,
-            s_col,
             base_data.x_cols,
             base_data.z_cols,
+            s_col,
             base_data.cluster_cols,
             base_data.use_other_treat_as_covariate,
             base_data.force_all_x_finite,
@@ -223,7 +223,10 @@ def s(self):
         if self.s_col is not None:
             return self._s.values
         else:
-            return None    @property
+            return None
+
+
+    @property
     def s_col(self):
         """
         The selection variable.
diff --git a/doubleml/data/tests/test_dml_data.py b/doubleml/data/tests/test_dml_data.py
index a84e4e77..af09e89e 100644
--- a/doubleml/data/tests/test_dml_data.py
+++ b/doubleml/data/tests/test_dml_data.py
@@ -344,11 +344,11 @@ def test_s_col_setter():
     dml_data.s_col = "s_new"
     assert np.array_equal(dml_data.s, s_comp)
 
-    msg = r"Invalid score or selection variable s_col. a13 is no data column."
+    msg = r"Invalid selection variable s_col. a13 is no data column."
     with pytest.raises(ValueError, match=msg):
         dml_data.s_col = "a13"
 
-    msg = r"The score or selection variable s_col must be of str type \(or None\). " "5 of type <class 'int'> was passed."
+    msg = r"The selection variable s_col must be of str type \(or None\). " "5 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_data.s_col = 5
 
diff --git a/doubleml/irm/datasets/dgp_ssm_data.py b/doubleml/irm/datasets/dgp_ssm_data.py
index 6a6a5bee..51a33c3a 100644
--- a/doubleml/irm/datasets/dgp_ssm_data.py
+++ b/doubleml/irm/datasets/dgp_ssm_data.py
@@ -2,15 +2,15 @@
 import pandas as pd
 from scipy.linalg import toeplitz
 
-from doubleml.data import DoubleMLData
-from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+from doubleml.data import DoubleMLSSMData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_ssm_data_alias
 
 _array_alias = _get_array_alias()
 _data_frame_alias = _get_data_frame_alias()
-_dml_data_alias = _get_dml_data_alias()
+_dml_ssm_data_alias = _get_dml_ssm_data_alias()
 
 
-def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type="DoubleMLData"):
+def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type="DoubleMLSSMData"):
     """
     Generates data from a sample selection model (SSM).
     The data generating process is defined as
@@ -86,7 +86,7 @@ def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type="DoubleM
 
     if return_type in _array_alias:
         return x, y, d, z, s
-    elif return_type in _data_frame_alias + _dml_data_alias:
+    elif return_type in _data_frame_alias + _dml_ssm_data_alias:
         x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
         if mar:
             data = pd.DataFrame(np.column_stack((x, y, d, s)), columns=x_cols + ["y", "d", "s"])
@@ -96,7 +96,7 @@ def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type="DoubleM
             return data
         else:
             if mar:
-                return DoubleMLData(data, "y", "d", x_cols, None, None, "s")
-            return DoubleMLData(data, "y", "d", x_cols, "z", None, "s")
+                return DoubleMLSSMData(data, "y", "d", x_cols, z_cols=None, s_col="s")
+            return DoubleMLSSMData(data, "y", "d", x_cols, z_cols="z", s_col="s")
     else:
         raise ValueError("Invalid return_type.")

From eb951c40ba6c6ed58854a1f6bede79411e2c7efb Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 5 Jun 2025 16:49:51 +0200
Subject: [PATCH 28/84] update check_resampling input

---
 doubleml/double_ml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 58b8692a..d2a7a641 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1290,7 +1290,7 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
         >>> dml_plr_obj.set_sample_splitting(smpls)
         """
         self._smpls, self._smpls_cluster, self._n_rep, self._n_folds = _check_sample_splitting(
-            all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data, n_obs=self.n_obs
+            all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data, n_obs=self._n_obs_sample_splitting
         )
 
         # set sample splitting can update the number of repetitions

From a6c6507fabe396ac084c1d9825b2fdf6a7850e33 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 5 Jun 2025 16:50:01 +0200
Subject: [PATCH 29/84] update did binary classes with n_obs_subset and
 n_obs_sample_splitting

---
 doubleml/did/did_binary.py                    | 12 ++++++------
 doubleml/did/did_cs_binary.py                 | 19 +++++--------------
 .../tests/test_did_binary_control_groups.py   |  2 +-
 .../test_did_binary_external_predictions.py   |  2 +-
 .../did/tests/test_did_binary_vs_did_panel.py |  2 +-
 5 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py
index e4d309db..a4876f74 100644
--- a/doubleml/did/did_binary.py
+++ b/doubleml/did/did_binary.py
@@ -171,8 +171,7 @@ def __init__(
 
         # Numeric values for positions of the entries in id_panel_data inside id_original
         # np.nonzero(np.isin(id_original, id_panel_data))
-        self._n_subset = self._panel_data_wide.shape[0]
-        self._n_obs = self._n_subset  # Effective sample size used for resampling
+        self._n_obs_subset = self._panel_data_wide.shape[0]  # Effective sample size used for resampling
         self._n_treated_subset = self._panel_data_wide["G_indicator"].sum()
 
         # Save x and y for later ML estimation
@@ -192,6 +191,7 @@ def __init__(
 
         # set stratication for resampling
         self._strata = self._panel_data_wide["G_indicator"]
+        self._n_obs_sample_splitting = self.n_obs_subset
         if draw_sample_splitting:
             self.draw_sample_splitting()
 
@@ -244,7 +244,7 @@ def __str__(self):
             f"Evaluation period: {str(self.t_value_eval)}\n"
             f"Control group: {str(self.control_group)}\n"
             f"Anticipation periods: {str(self.anticipation_periods)}\n"
-            f"Effective sample size: {str(self.n_obs)}\n"
+            f"Effective sample size: {str(self.n_obs_subset)}\n"
         )
         learner_info = ""
         for key, value in self.learner.items():
@@ -371,11 +371,11 @@ def trimming_threshold(self):
         return self._trimming_threshold
 
     @property
-    def n_obs(self):
+    def n_obs_subset(self):
         """
         The number of observations used for estimation.
         """
-        return self._n_subset
+        return self._n_obs_subset
 
     def _initialize_ml_nuisance_params(self):
         if self.score == "observational":
@@ -713,7 +713,7 @@ def _sensitivity_element_est(self, preds):
         }
 
         # add scaling to make variance estimation consistent (sample size difference)
-        scaling = self._dml_data.n_obs / self._n_subset
+        scaling = self._dml_data.n_obs / self._n_obs_subset
         element_dict = {
             "sigma2": sigma2,
             "nu2": nu2,
diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index e550eb60..d571e107 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -53,16 +53,7 @@ def __init__(
         self._n_obs = obj_dml_data.data.shape[0]
         self._score_dim = (self._n_obs, self.n_rep, self._dml_data.n_treat)
         # reinitialze arrays
-        (
-            self._psi,
-            self._psi_deriv,
-            self._psi_elements,
-            self._var_scaling_factors,
-            self._coef,
-            self._se,
-            self._all_coef,
-            self._all_se,
-        ) = self._initialize_arrays()
+        self._initialize_arrays()
 
         self._check_data(self._dml_data)
         g_values = self._dml_data.g_values
@@ -108,8 +99,7 @@ def __init__(
 
         # Numeric values for positions of the entries in id_panel_data inside id_original
         # np.nonzero(np.isin(id_original, id_panel_data))
-        self._n_subset = self.data_subset.shape[0]
-        self._n_obs = self._n_subset  # Effective sample size used for resampling
+        self._n_obs_subset = self.data_subset.shape[0]  # Effective sample size used for resampling
 
         # Save x and y for later ML estimation
         self._x_data = self.data_subset.loc[:, self._dml_data.x_cols].values
@@ -129,6 +119,7 @@ def __init__(
 
         # set stratication for resampling
         self._strata = self.data_subset["G_indicator"] + 2 * self.data_subset["t_indicator"]
+        self._n_obs_sample_splitting = self.n_obs_subset
         if draw_sample_splitting:
             self.draw_sample_splitting()
 
@@ -255,11 +246,11 @@ def trimming_threshold(self):
         return self._trimming_threshold
 
     @property
-    def n_obs(self):
+    def n_obs_subset(self):
         """
         The number of observations used for estimation.
         """
-        return self._n_subset
+        return self._n_obs_subset
 
     def _initialize_ml_nuisance_params(self):
         if self.score == "observational":
diff --git a/doubleml/did/tests/test_did_binary_control_groups.py b/doubleml/did/tests/test_did_binary_control_groups.py
index b8406b15..627cf50a 100644
--- a/doubleml/did/tests/test_did_binary_control_groups.py
+++ b/doubleml/did/tests/test_did_binary_control_groups.py
@@ -21,7 +21,7 @@ def test_control_groups_different():
     dml_did_never_treated = dml.did.DoubleMLDIDBinary(control_group="never_treated", **args)
     dml_did_not_yet_treated = dml.did.DoubleMLDIDBinary(control_group="not_yet_treated", **args)
 
-    assert dml_did_never_treated._n_subset != dml_did_not_yet_treated._n_subset
+    assert dml_did_never_treated.n_obs_subset != dml_did_not_yet_treated.n_obs_subset
     # same treatment group
     assert dml_did_never_treated._n_treated_subset == dml_did_not_yet_treated._n_treated_subset
 
diff --git a/doubleml/did/tests/test_did_binary_external_predictions.py b/doubleml/did/tests/test_did_binary_external_predictions.py
index ccc136d0..0cb3e055 100644
--- a/doubleml/did/tests/test_did_binary_external_predictions.py
+++ b/doubleml/did/tests/test_did_binary_external_predictions.py
@@ -112,7 +112,7 @@ def doubleml_did_panel_fixture(did_score, n_rep):
     }
 
     dml_did = DoubleMLDIDBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
-    all_smpls = draw_smpls(n_obs=dml_did._n_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_panel)
+    all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_panel)
     dml_did.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
diff --git a/doubleml/did/tests/test_did_binary_vs_did_panel.py b/doubleml/did/tests/test_did_binary_vs_did_panel.py
index 1eacdf6a..7d1dc947 100644
--- a/doubleml/did/tests/test_did_binary_vs_did_panel.py
+++ b/doubleml/did/tests/test_did_binary_vs_did_panel.py
@@ -178,7 +178,7 @@ def test_sensitivity_elements(dml_did_binary_vs_did_fixture):
         )
     for sensitivity_element in ["psi_sigma2", "psi_nu2", "riesz_rep"]:
         dml_binary_obj = dml_did_binary_vs_did_fixture["dml_did_binary_obj"]
-        scaling = dml_binary_obj._n_subset / dml_binary_obj._dml_data.n_obs
+        scaling = dml_binary_obj.n_obs_subset / dml_binary_obj._dml_data.n_obs
         binary_sensitivity_element = scaling * _get_id_positions(
             dml_did_binary_vs_did_fixture["sensitivity_elements_binary"][sensitivity_element], dml_binary_obj._id_positions
         )

From d54b272235261a090792990b180c1b74b4e861da Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 6 Jun 2025 09:19:43 +0200
Subject: [PATCH 30/84] update tune without folds to n_obs of doubleml obj

---
 doubleml/double_ml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index d2a7a641..88f677ef 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -848,7 +848,7 @@ def tune(
                         self.set_ml_nuisance_params(nuisance_model, self._dml_data.d_cols[i_d], params)
 
             else:
-                smpls = [(np.arange(self._dml_data.n_obs), np.arange(self._dml_data.n_obs))]
+                smpls = [(np.arange(self.n_obs), np.arange(self.n_obs))]
                 # tune hyperparameters
                 res = self._nuisance_tuning(
                     smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search

From 693e109bd65d6cb0987c9de2363266cf48c61d32 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 6 Jun 2025 09:20:31 +0200
Subject: [PATCH 31/84] change n_obs for panel data add n_ids for did_binary
 obj

---
 doubleml/data/panel_data.py                   |  4 +--
 doubleml/data/tests/test_panel_data.py        |  5 ++--
 doubleml/did/did_binary.py                    | 12 ++++++--
 doubleml/did/did_cs_binary.py                 |  5 ----
 .../did/tests/test_did_binary_vs_did_panel.py |  2 +-
 .../test_did_multi_external_predictions.py    |  7 +++++
 .../did/tests/test_did_multi_return_types.py  |  5 ++--
 doubleml/did/tests/test_return_types.py       | 13 ++++++--
 doubleml/utils/_check_return_types.py         | 30 ++++++-------------
 9 files changed, 45 insertions(+), 38 deletions(-)

diff --git a/doubleml/data/panel_data.py b/doubleml/data/panel_data.py
index 4e416183..59ad531c 100644
--- a/doubleml/data/panel_data.py
+++ b/doubleml/data/panel_data.py
@@ -215,9 +215,9 @@ def id_var_unique(self):
         return self._id_var_unique
 
     @property
-    def n_obs(self):
+    def n_ids(self):
         """
-        The number of observations. For panel data, the number of unique values for id_col.
+        The number of unique values for id_col.
         """
         return len(self._id_var_unique)
 
diff --git a/doubleml/data/tests/test_panel_data.py b/doubleml/data/tests/test_panel_data.py
index 2f2250ba..e1a7c925 100644
--- a/doubleml/data/tests/test_panel_data.py
+++ b/doubleml/data/tests/test_panel_data.py
@@ -56,7 +56,7 @@ def test_id_col_setter():
     dml_data.id_col = "id_new"
     assert np.array_equal(dml_data.id_var, id_comp)
     assert dml_data._id_var_unique == np.unique(id_comp)
-    assert dml_data.n_obs == 1
+    assert dml_data.n_ids == 1
 
     msg = "Invalid id variable id_col. a13 is no data column."
     with pytest.raises(ValueError, match=msg):
@@ -169,7 +169,8 @@ def test_panel_data_properties():
 
     assert np.array_equal(dml_data.id_var, df["id"].values)
     assert np.array_equal(dml_data.id_var_unique, np.unique(df["id"].values))
-    assert dml_data.n_obs == len(np.unique(df["id"].values))
+    assert dml_data.n_obs == df.shape[0]
+    assert dml_data.n_ids == len(np.unique(df["id"].values))
     assert dml_data.g_col == "d"
     assert np.array_equal(dml_data.g_values, np.sort(np.unique(df["d"].values)))
     assert dml_data.n_groups == len(np.unique(df["d"].values))
diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py
index a4876f74..a9939c97 100644
--- a/doubleml/did/did_binary.py
+++ b/doubleml/did/did_binary.py
@@ -124,6 +124,12 @@ def __init__(
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting=False)
 
         self._check_data(self._dml_data)
+        # for did panel data the scores are based on the number of unique ids
+        self._n_obs = obj_dml_data.n_ids
+        self._score_dim = (self._n_obs, self.n_rep, self._dml_data.n_treat)
+        # reinitialze arrays
+        self._initialize_arrays()
+
         g_values = self._dml_data.g_values
         t_values = self._dml_data.t_values
 
@@ -542,7 +548,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         psi_a, psi_b = self._score_elements(y, d, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], p_hat)
 
         extend_kwargs = {
-            "n_obs": self._dml_data.n_obs,
+            "n_obs": self._dml_data.n_ids,
             "id_positions": self.id_positions,
         }
         psi_elements = {
@@ -707,13 +713,13 @@ def _sensitivity_element_est(self, preds):
         psi_nu2 = nu2_score_element - nu2
 
         extend_kwargs = {
-            "n_obs": self._dml_data.n_obs,
+            "n_obs": self._dml_data.n_ids,
             "id_positions": self.id_positions,
             "fill_value": 0.0,
         }
 
         # add scaling to make variance estimation consistent (sample size difference)
-        scaling = self._dml_data.n_obs / self._n_obs_subset
+        scaling = self._dml_data.n_ids / self._n_obs_subset
         element_dict = {
             "sigma2": sigma2,
             "nu2": nu2,
diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index d571e107..e1786242 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -50,11 +50,6 @@ def __init__(
     ):
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting=False)
 
-        self._n_obs = obj_dml_data.data.shape[0]
-        self._score_dim = (self._n_obs, self.n_rep, self._dml_data.n_treat)
-        # reinitialze arrays
-        self._initialize_arrays()
-
         self._check_data(self._dml_data)
         g_values = self._dml_data.g_values
         t_values = self._dml_data.t_values
diff --git a/doubleml/did/tests/test_did_binary_vs_did_panel.py b/doubleml/did/tests/test_did_binary_vs_did_panel.py
index 7d1dc947..9da81739 100644
--- a/doubleml/did/tests/test_did_binary_vs_did_panel.py
+++ b/doubleml/did/tests/test_did_binary_vs_did_panel.py
@@ -178,7 +178,7 @@ def test_sensitivity_elements(dml_did_binary_vs_did_fixture):
         )
     for sensitivity_element in ["psi_sigma2", "psi_nu2", "riesz_rep"]:
         dml_binary_obj = dml_did_binary_vs_did_fixture["dml_did_binary_obj"]
-        scaling = dml_binary_obj.n_obs_subset / dml_binary_obj._dml_data.n_obs
+        scaling = dml_binary_obj.n_obs_subset / dml_binary_obj._dml_data.n_ids
         binary_sensitivity_element = scaling * _get_id_positions(
             dml_did_binary_vs_did_fixture["sensitivity_elements_binary"][sensitivity_element], dml_binary_obj._id_positions
         )
diff --git a/doubleml/did/tests/test_did_multi_external_predictions.py b/doubleml/did/tests/test_did_multi_external_predictions.py
index 2e7003f9..e336487d 100644
--- a/doubleml/did/tests/test_did_multi_external_predictions.py
+++ b/doubleml/did/tests/test_did_multi_external_predictions.py
@@ -100,3 +100,10 @@ def test_coef(doubleml_did_multi_ext_fixture):
     assert math.isclose(
         doubleml_did_multi_ext_fixture["coef"], doubleml_did_multi_ext_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3
     )
+
+
+@pytest.mark.ci
+def test_se(doubleml_did_multi_ext_fixture):
+    assert math.isclose(
+        doubleml_did_multi_ext_fixture["se"], doubleml_did_multi_ext_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-3
+    )
diff --git a/doubleml/did/tests/test_did_multi_return_types.py b/doubleml/did/tests/test_did_multi_return_types.py
index 2e12ce10..c11544ed 100644
--- a/doubleml/did/tests/test_did_multi_return_types.py
+++ b/doubleml/did/tests/test_did_multi_return_types.py
@@ -17,6 +17,7 @@
 N_REP = 1
 N_FOLDS = 3
 N_REP_BOOT = 314
+N_PERIODS = 5
 
 dml_args = {
     "n_rep": N_REP,
@@ -30,7 +31,7 @@
 datasets = {}
 
 # panel data
-df_panel = make_did_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=5, time_type="float")
+df_panel = make_did_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=N_PERIODS, time_type="float")
 df_panel["y_binary"] = np.random.binomial(n=1, p=0.5, size=df_panel.shape[0])
 datasets["did_panel"] = DoubleMLPanelData(
     df_panel, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]
@@ -89,7 +90,7 @@ def test_panel_property_types_and_shapes(fitted_dml_obj):
     assert dml_obj.n_gt_atts == n_treat
     assert dml_obj.n_rep == N_REP
     assert dml_obj.n_folds == N_FOLDS
-    assert dml_obj._dml_data.n_obs == N_OBS
+    assert dml_obj._dml_data.n_obs == N_OBS * N_PERIODS
     assert dml_obj.n_rep_boot == N_REP_BOOT
 
     assert isinstance(dml_obj.all_coef, np.ndarray)
diff --git a/doubleml/did/tests/test_return_types.py b/doubleml/did/tests/test_return_types.py
index a59cec6c..1b6fa736 100644
--- a/doubleml/did/tests/test_return_types.py
+++ b/doubleml/did/tests/test_return_types.py
@@ -79,7 +79,8 @@ def test_sensitivity_return_types(fitted_dml_obj):
 
 
 # panel data
-df_panel = make_did_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=5, time_type="float")
+N_PERIODS = 5
+df_panel = make_did_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=N_PERIODS, time_type="float")
 df_panel["y_binary"] = np.random.binomial(n=1, p=0.5, size=df_panel.shape[0])
 datasets["did_panel"] = DoubleMLPanelData(
     df_panel, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]
@@ -160,7 +161,15 @@ def fitted_panel_dml_obj(request):
 
 @pytest.mark.ci
 def test_panel_property_types_and_shapes(fitted_panel_dml_obj):
-    check_basic_property_types_and_shapes(fitted_panel_dml_obj, N_OBS, N_TREAT, N_REP, N_FOLDS, N_REP_BOOT)
+    check_basic_property_types_and_shapes(
+        fitted_panel_dml_obj,
+        n_obs=N_PERIODS * N_OBS,
+        n_treat=N_TREAT,
+        n_rep=N_REP,
+        n_folds=N_FOLDS,
+        n_rep_boot=N_REP_BOOT,
+        score_dim=(N_OBS, N_REP, N_TREAT),
+    )
     check_basic_predictions_and_targets(fitted_panel_dml_obj, N_OBS, N_TREAT, N_REP)
 
 
diff --git a/doubleml/utils/_check_return_types.py b/doubleml/utils/_check_return_types.py
index 54462059..54e72833 100644
--- a/doubleml/utils/_check_return_types.py
+++ b/doubleml/utils/_check_return_types.py
@@ -31,10 +31,14 @@ def check_basic_return_types(dml_obj, cls):
     assert isinstance(dml_obj._dml_data.__str__(), str)
 
 
-def check_basic_property_types_and_shapes(dml_obj, n_obs, n_treat, n_rep, n_folds, n_rep_boot):
+def check_basic_property_types_and_shapes(dml_obj, n_obs, n_treat, n_rep, n_folds, n_rep_boot, score_dim=None):
     # not checked: learner, learner_names, params, params_names, score
     # already checked: summary
 
+    # use default combination
+    if score_dim is None:
+        score_dim = (n_obs, n_rep, n_treat)
+
     # check that the setting is still in line with the hard-coded values
     assert dml_obj._dml_data.n_treat == n_treat
     assert dml_obj.n_rep == n_rep
@@ -55,35 +59,19 @@ def check_basic_property_types_and_shapes(dml_obj, n_obs, n_treat, n_rep, n_fold
     assert dml_obj.coef.shape == (n_treat,)
 
     assert isinstance(dml_obj.psi, np.ndarray)
-    assert dml_obj.psi.shape == (
-        n_obs,
-        n_rep,
-        n_treat,
-    )
+    assert dml_obj.psi.shape == score_dim
 
     is_nonlinear = isinstance(dml_obj, NonLinearScoreMixin)
     if is_nonlinear:
         for score_element in dml_obj._score_element_names:
             assert isinstance(dml_obj.psi_elements[score_element], np.ndarray)
-            assert dml_obj.psi_elements[score_element].shape == (
-                n_obs,
-                n_rep,
-                n_treat,
-            )
+            assert dml_obj.psi_elements[score_element].shape == score_dim
     else:
         assert isinstance(dml_obj.psi_elements["psi_a"], np.ndarray)
-        assert dml_obj.psi_elements["psi_a"].shape == (
-            n_obs,
-            n_rep,
-            n_treat,
-        )
+        assert dml_obj.psi_elements["psi_a"].shape == score_dim
 
         assert isinstance(dml_obj.psi_elements["psi_b"], np.ndarray)
-        assert dml_obj.psi_elements["psi_b"].shape == (
-            n_obs,
-            n_rep,
-            n_treat,
-        )
+        assert dml_obj.psi_elements["psi_b"].shape == score_dim
 
     assert isinstance(dml_obj.framework, DoubleMLFramework)
     assert isinstance(dml_obj.pval, np.ndarray)

From 16624d5677cbf95dad815d6536b54e547bd0db05 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Fri, 6 Jun 2025 11:10:57 +0200
Subject: [PATCH 32/84] fix docstr

---
 doubleml/data/did_data.py | 3 ++-
 doubleml/data/rdd_data.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/doubleml/data/did_data.py b/doubleml/data/did_data.py
index c7909b4e..fd4fc7de 100644
--- a/doubleml/data/did_data.py
+++ b/doubleml/data/did_data.py
@@ -181,7 +181,8 @@ def from_arrays(
             Default is ``True``.
 
         Examples
-        --------        >>> from doubleml import DoubleMLDIDData
+        --------
+        >>> from doubleml import DoubleMLDIDData
         >>> from doubleml.did.datasets import make_did_SZ2020
         >>> (x, y, d, t) = make_did_SZ2020(return_type='array')
         >>> obj_dml_data_from_array = DoubleMLDIDData.from_arrays(x, y, d, t=t)
diff --git a/doubleml/data/rdd_data.py b/doubleml/data/rdd_data.py
index ac0fff67..f19a4fa0 100644
--- a/doubleml/data/rdd_data.py
+++ b/doubleml/data/rdd_data.py
@@ -59,7 +59,8 @@ class DoubleMLRDDData(DoubleMLData):
         Default is ``True``.
 
     Examples
-    --------    >>> from doubleml import DoubleMLRDDData
+    --------
+    >>> from doubleml import DoubleMLRDDData
     >>> from doubleml.rdd.datasets import make_rdd_data
     >>> # initialization from pandas.DataFrame
     >>> df = make_rdd_data(return_type='DataFrame')

From 7d6ef350f5116241a84017e49ae5e9dd59f56895 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 6 Jun 2025 12:18:04 +0200
Subject: [PATCH 33/84] fix order test

---
 doubleml/did/did_cs_binary.py                 |  2 +-
 ...test_did_cs_binary_vs_did_cs_two_period.py | 47 ++++++++++++++++++-
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index e1786242..5d6e3638 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -462,7 +462,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         )
 
         extend_kwargs = {
-            "n_obs": self._dml_data.data.shape[0],
+            "n_obs": self._dml_data.n_obs,
             "id_positions": self.id_positions,
         }
         psi_elements = {
diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py
index 2c8c34f3..a0a25718 100644
--- a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py
+++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py
@@ -51,10 +51,14 @@ def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score
     # collect data
     dml_panel_data = generate_data_did_binary
     df = dml_panel_data._data.sort_values(by=["id", "t"])
+    # Reorder data before to make both approaches compatible
+    dml_panel_data = dml.data.DoubleMLPanelData(
+        df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]
+    )
+    obj_dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"])
 
     n_obs = df.shape[0]
     all_smpls = draw_smpls(n_obs, n_folds)
-    obj_dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"])
 
     # Set machine learning methods for m & g
     ml_g = clone(learner[0])
@@ -161,3 +165,44 @@ def test_coefs(dml_did_cs_binary_vs_did_cs_fixture):
         rel_tol=1e-9,
         abs_tol=1e-4,
     )
+
+
+@pytest.mark.ci
+def test_ses(dml_did_cs_binary_vs_did_cs_fixture):
+    assert math.isclose(
+        dml_did_cs_binary_vs_did_cs_fixture["se"][0],
+        dml_did_cs_binary_vs_did_cs_fixture["se_manual"],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
+    assert math.isclose(
+        dml_did_cs_binary_vs_did_cs_fixture["se_binary"][0],
+        dml_did_cs_binary_vs_did_cs_fixture["se"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
+
+
+@pytest.mark.ci
+def test_boot(dml_did_cs_binary_vs_did_cs_fixture):
+    for bootstrap in dml_did_cs_binary_vs_did_cs_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_did_cs_binary_vs_did_cs_fixture["boot_t_stat" + bootstrap],
+            dml_did_cs_binary_vs_did_cs_fixture["boot_t_stat" + bootstrap + "_manual"],
+            atol=1e-4,
+        )
+        assert np.allclose(
+            dml_did_cs_binary_vs_did_cs_fixture["boot_t_stat" + bootstrap],
+            dml_did_cs_binary_vs_did_cs_fixture["boot_t_stat" + bootstrap + "_binary"],
+            atol=1e-4,
+        )
+
+
+@pytest.mark.ci
+def test_nuisance_loss(dml_did_cs_binary_vs_did_cs_fixture):
+    assert (
+        dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss"].keys()
+        == dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss_binary"].keys()
+    )
+    for key, value in dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss"].items():
+        assert np.allclose(value, dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss_binary"][key], rtol=1e-9, atol=1e-3)

From 18c38445220a723dc6935fa3c9f788aea4e54e48 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 6 Jun 2025 12:30:57 +0200
Subject: [PATCH 34/84] add sensitivity estimation to did_cs_binary

---
 doubleml/did/did_cs_binary.py                 | 101 +++++++++++++++++-
 ...test_did_cs_binary_vs_did_cs_two_period.py |  76 ++++++++++++-
 2 files changed, 174 insertions(+), 3 deletions(-)

diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index 5d6e3638..479cba93 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -153,7 +153,7 @@ def __init__(
         self._trimming_threshold = trimming_threshold
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
-        self._sensitivity_implemented = False
+        self._sensitivity_implemented = True
         self._external_predictions_implemented = True
 
     @property
@@ -589,4 +589,101 @@ def _nuisance_tuning(
         pass
 
     def _sensitivity_element_est(self, preds):
-        pass
+        y = self._y_data
+        d = self._g_data
+        t = self._t_data
+
+        m_hat = _get_id_positions(preds["predictions"]["ml_m"], self.id_positions)
+        g_hat_d0_t0 = _get_id_positions(preds["predictions"]["ml_g_d0_t0"], self.id_positions)
+        g_hat_d0_t1 = _get_id_positions(preds["predictions"]["ml_g_d0_t1"], self.id_positions)
+        g_hat_d1_t0 = _get_id_positions(preds["predictions"]["ml_g_d1_t0"], self.id_positions)
+        g_hat_d1_t1 = _get_id_positions(preds["predictions"]["ml_g_d1_t1"], self.id_positions)
+
+        d0t0 = np.multiply(1.0 - d, 1.0 - t)
+        d0t1 = np.multiply(1.0 - d, t)
+        d1t0 = np.multiply(d, 1.0 - t)
+        d1t1 = np.multiply(d, t)
+
+        g_hat = (
+            np.multiply(d0t0, g_hat_d0_t0)
+            + np.multiply(d0t1, g_hat_d0_t1)
+            + np.multiply(d1t0, g_hat_d1_t0)
+            + np.multiply(d1t1, g_hat_d1_t1)
+        )
+        sigma2_score_element = np.square(y - g_hat)
+        sigma2 = np.mean(sigma2_score_element)
+        psi_sigma2 = sigma2_score_element - sigma2
+
+        # calc m(W,alpha) and Riesz representer
+        p_hat = np.mean(d)
+        lambda_hat = np.mean(t)
+        if self.score == "observational":
+            propensity_weight_d0 = np.divide(m_hat, 1.0 - m_hat)
+            if self.in_sample_normalization:
+                weight_d0t1 = np.multiply(d0t1, propensity_weight_d0)
+                weight_d0t0 = np.multiply(d0t0, propensity_weight_d0)
+                mean_weight_d0t1 = np.mean(weight_d0t1)
+                mean_weight_d0t0 = np.mean(weight_d0t0)
+
+                m_alpha = np.multiply(
+                    np.divide(d, p_hat),
+                    np.divide(1.0, np.mean(d1t1))
+                    + np.divide(1.0, np.mean(d1t0))
+                    + np.divide(propensity_weight_d0, mean_weight_d0t1)
+                    + np.divide(propensity_weight_d0, mean_weight_d0t0),
+                )
+
+                rr = (
+                    np.divide(d1t1, np.mean(d1t1))
+                    - np.divide(d1t0, np.mean(d1t0))
+                    - np.divide(weight_d0t1, mean_weight_d0t1)
+                    + np.divide(weight_d0t0, mean_weight_d0t0)
+                )
+            else:
+                m_alpha_1 = np.divide(1.0, lambda_hat) + np.divide(1.0, 1.0 - lambda_hat)
+                m_alpha = np.multiply(np.divide(d, np.square(p_hat)), np.multiply(m_alpha_1, 1.0 + propensity_weight_d0))
+
+                rr_1 = np.divide(t, np.multiply(p_hat, lambda_hat)) + np.divide(1.0 - t, np.multiply(p_hat, 1.0 - lambda_hat))
+                rr_2 = d + np.multiply(1.0 - d, propensity_weight_d0)
+                rr = np.multiply(rr_1, rr_2)
+        else:
+            assert self.score == "experimental"
+            if self.in_sample_normalization:
+                m_alpha = (
+                    np.divide(1.0, np.mean(d1t1))
+                    + np.divide(1.0, np.mean(d1t0))
+                    + np.divide(1.0, np.mean(d0t1))
+                    + np.divide(1.0, np.mean(d0t0))
+                )
+                rr = (
+                    np.divide(d1t1, np.mean(d1t1))
+                    - np.divide(d1t0, np.mean(d1t0))
+                    - np.divide(d0t1, np.mean(d0t1))
+                    + np.divide(d0t0, np.mean(d0t0))
+                )
+            else:
+                m_alpha = (
+                    np.divide(1.0, np.multiply(p_hat, lambda_hat))
+                    + np.divide(1.0, np.multiply(p_hat, 1.0 - lambda_hat))
+                    + np.divide(1.0, np.multiply(1.0 - p_hat, lambda_hat))
+                    + np.divide(1.0, np.multiply(1.0 - p_hat, 1.0 - lambda_hat))
+                )
+                rr = (
+                    np.divide(d1t1, np.multiply(p_hat, lambda_hat))
+                    - np.divide(d1t0, np.multiply(p_hat, 1.0 - lambda_hat))
+                    - np.divide(d0t1, np.multiply(1.0 - p_hat, lambda_hat))
+                    + np.divide(d0t0, np.multiply(1.0 - p_hat, 1.0 - lambda_hat))
+                )
+
+        nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr)
+        nu2 = np.mean(nu2_score_element)
+        psi_nu2 = nu2_score_element - nu2
+
+        element_dict = {
+            "sigma2": sigma2,
+            "nu2": nu2,
+            "psi_sigma2": psi_sigma2,
+            "psi_nu2": psi_nu2,
+            "riesz_rep": rr,
+        }
+        return element_dict
diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py
index a0a25718..73e6b827 100644
--- a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py
+++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py
@@ -9,7 +9,7 @@
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_did_cs_manual import fit_did_cs
+from ._utils_did_cs_manual import fit_did_cs, fit_sensitivity_elements_did_cs
 from ._utils_did_manual import boot_did
 
 
@@ -148,6 +148,30 @@ def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score
         res_dict["boot_t_stat" + bootstrap + "_binary"] = dml_did_binary_obj.boot_t_stat
         res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
+    # sensitivity tests
+    res_dict["sensitivity_elements"] = dml_did_obj.sensitivity_elements
+    res_dict["sensitivity_elements_binary"] = dml_did_binary_obj.sensitivity_elements
+    res_dict["sensitivity_elements_manual"] = fit_sensitivity_elements_did_cs(
+        y,
+        d,
+        t,
+        all_coef=dml_did_obj.all_coef,
+        predictions=dml_did_obj.predictions,
+        score=score,
+        in_sample_normalization=in_sample_normalization,
+        n_rep=1,
+    )
+
+    # sensitivity tests
+    res_dict["sensitivity_elements"] = dml_did_obj.sensitivity_elements
+    res_dict["sensitivity_elements_binary"] = dml_did_binary_obj.sensitivity_elements
+
+    dml_did_obj.sensitivity_analysis()
+    dml_did_binary_obj.sensitivity_analysis()
+
+    res_dict["sensitivity_params"] = dml_did_obj.sensitivity_params
+    res_dict["sensitivity_params_binary"] = dml_did_binary_obj.sensitivity_params
+
     return res_dict
 
 
@@ -206,3 +230,53 @@ def test_nuisance_loss(dml_did_cs_binary_vs_did_cs_fixture):
     )
     for key, value in dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss"].items():
         assert np.allclose(value, dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss_binary"][key], rtol=1e-9, atol=1e-3)
+
+
+@pytest.mark.ci
+def test_sensitivity_elements(dml_did_cs_binary_vs_did_cs_fixture):
+    sensitivity_element_names = ["sigma2", "nu2", "psi_sigma2", "psi_nu2"]
+    for sensitivity_element in sensitivity_element_names:
+        assert np.allclose(
+            dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements"][sensitivity_element],
+            dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements_manual"][sensitivity_element],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+        assert np.allclose(
+            dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements"][sensitivity_element],
+            dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements_binary"][sensitivity_element],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+    for sensitivity_element in ["riesz_rep"]:
+        assert np.allclose(
+            dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements"][sensitivity_element],
+            dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements_binary"][sensitivity_element],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+
+
+@pytest.mark.ci
+def test_sensitivity_params(dml_did_cs_binary_vs_did_cs_fixture):
+    for key in ["theta", "se", "ci"]:
+        assert np.allclose(
+            dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params"][key]["lower"],
+            dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params_binary"][key]["lower"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+        assert np.allclose(
+            dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params"][key]["upper"],
+            dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params_binary"][key]["upper"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+
+    for key in ["rv", "rva"]:
+        assert np.allclose(
+            dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params"][key],
+            dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params_binary"][key],
+            rtol=1e-9,
+            atol=1e-4,
+        )

From 5d2232b455bede3866f0c2b7626f682f4cbad14d Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 6 Jun 2025 13:00:20 +0200
Subject: [PATCH 35/84] fix id positions and scaling for sensitivity

---
 doubleml/did/did_cs_binary.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index 479cba93..6b2206a3 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -679,11 +679,19 @@ def _sensitivity_element_est(self, preds):
         nu2 = np.mean(nu2_score_element)
         psi_nu2 = nu2_score_element - nu2
 
+        extend_kwargs = {
+            "n_obs": self._dml_data.n_obs,
+            "id_positions": self.id_positions,
+            "fill_value": 0.0,
+        }
+
+        # add scaling to make variance estimation consistent (sample size difference)
+        scaling = self._dml_data.n_obs / self._n_obs_subset
         element_dict = {
             "sigma2": sigma2,
             "nu2": nu2,
-            "psi_sigma2": psi_sigma2,
-            "psi_nu2": psi_nu2,
-            "riesz_rep": rr,
+            "psi_sigma2": scaling * _set_id_positions(psi_sigma2, **extend_kwargs),
+            "psi_nu2": scaling * _set_id_positions(psi_nu2, **extend_kwargs),
+            "riesz_rep": scaling * _set_id_positions(rr, **extend_kwargs),
         }
         return element_dict

From 7f01b6b5accc1293fba5435ae81129cca4b5f630 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 6 Jun 2025 13:00:33 +0200
Subject: [PATCH 36/84] add placebo test for did_cs_binary

---
 .../did/tests/test_did_cs_binary_placebo.py   | 58 +++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 doubleml/did/tests/test_did_cs_binary_placebo.py

diff --git a/doubleml/did/tests/test_did_cs_binary_placebo.py b/doubleml/did/tests/test_did_cs_binary_placebo.py
new file mode 100644
index 00000000..61def691
--- /dev/null
+++ b/doubleml/did/tests/test_did_cs_binary_placebo.py
@@ -0,0 +1,58 @@
+import numpy as np
+import pytest
+from lightgbm import LGBMClassifier, LGBMRegressor
+
+from doubleml.data import DoubleMLPanelData
+from doubleml.did import DoubleMLDIDCSBinary
+from doubleml.did.datasets import make_did_CS2021
+
+
+@pytest.fixture(scope="module", params=["observational", "experimental"])
+def did_score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_did_fixture(did_score, n_rep):
+    n_obs = 500
+    dgp = 5  # has to be experimental (for experimental score to be valid)
+    df = make_did_CS2021(n_obs=n_obs, dgp=dgp, n_pre_treat_periods=3)
+    dml_data = DoubleMLPanelData(df, y_col="y", d_cols="d", t_col="t", id_col="id", x_cols=["Z1", "Z2", "Z3", "Z4"])
+
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "g_value": dml_data.g_values[0],
+        "t_value_pre": dml_data.t_values[0],
+        "t_value_eval": dml_data.t_values[1],
+        "ml_g": LGBMRegressor(verbose=-1),
+        "ml_m": LGBMClassifier(verbose=-1),
+        "score": did_score,
+        "n_rep": n_rep,
+        "n_folds": 5,
+        "draw_sample_splitting": True,
+    }
+
+    dml_did = DoubleMLDIDCSBinary(**kwargs)
+
+    np.random.seed(3141)
+    dml_did.fit()
+    ci = dml_did.confint(level=0.99)
+
+    res_dict = {
+        "coef": dml_did.coef[0],
+        "ci_lower": ci.iloc[0, 0],
+        "ci_upper": ci.iloc[0, 1],
+    }
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_zero(doubleml_did_fixture):
+    assert doubleml_did_fixture["ci_lower"] <= 0.0
+    assert doubleml_did_fixture["ci_upper"] >= 0.0

From 3fafccc2cddb36397880ea9dfed993efe8c8d0ad Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 6 Jun 2025 13:08:45 +0200
Subject: [PATCH 37/84] extend ext prediction tests for did_cs_binary

---
 ...test_did_cs_binary_external_predictions.py | 81 ++++++++++++++++++-
 1 file changed, 80 insertions(+), 1 deletion(-)

diff --git a/doubleml/did/tests/test_did_cs_binary_external_predictions.py b/doubleml/did/tests/test_did_cs_binary_external_predictions.py
index 4e09dfe0..477c6dc7 100644
--- a/doubleml/did/tests/test_did_cs_binary_external_predictions.py
+++ b/doubleml/did/tests/test_did_cs_binary_external_predictions.py
@@ -4,8 +4,9 @@
 import pytest
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
+from doubleml.data import DoubleMLPanelData
 from doubleml.did import DoubleMLDIDCSBinary
-from doubleml.did.datasets import make_did_SZ2020
+from doubleml.did.datasets import make_did_cs_CS2021, make_did_SZ2020
 from doubleml.tests._utils import draw_smpls
 from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
@@ -90,3 +91,81 @@ def test_score(doubleml_did_cs_fixture):
 def test_nuisance_loss(doubleml_did_cs_fixture):
     for key, value in doubleml_did_cs_fixture["dml_did_nuisance_loss"].items():
         assert np.allclose(value, doubleml_did_cs_fixture["dml_did_ext_nuisance_loss"][key], rtol=1e-9, atol=1e-3)
+
+
+@pytest.fixture(scope="module")
+def doubleml_did_cs_panel_fixture(did_score, n_rep):
+    n_obs = 500
+    n_folds = 5
+    dgp = 1
+
+    ext_predictions = {"d": {}}
+    df = make_did_cs_CS2021(n_obs=n_obs, dgp_type=dgp, time_type="float")
+    dml_panel_data = DoubleMLPanelData(df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"])
+
+    kwargs = {
+        "obj_dml_data": dml_panel_data,
+        "g_value": 2,
+        "t_value_pre": 0,
+        "t_value_eval": 1,
+        "score": did_score,
+        "n_rep": n_rep,
+        "draw_sample_splitting": False,
+    }
+
+    dml_did = DoubleMLDIDCSBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_data)
+    dml_did.set_sample_splitting(all_smpls)
+
+    np.random.seed(3141)
+    dml_did.fit(store_predictions=True)
+
+    all_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"]
+    for key in all_keys:
+        ext_predictions["d"][key] = dml_did.predictions[key][:, :, 0]
+    if did_score == "observational":
+        ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0]
+    dml_did_ext = DoubleMLDIDCSBinary(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs)
+    dml_did_ext.set_sample_splitting(all_smpls)
+    np.random.seed(3141)
+    dml_did_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {
+        "coef": dml_did.coef[0],
+        "coef_ext": dml_did_ext.coef[0],
+        "se": dml_did.se[0],
+        "se_ext": dml_did_ext.se[0],
+        "score": dml_did.psi,
+        "score_ext": dml_did_ext.psi,
+        "dml_did_nuisance_loss": dml_did.nuisance_loss,
+        "dml_did_ext_nuisance_loss": dml_did_ext.nuisance_loss,
+    }
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_panel_coef(doubleml_did_cs_panel_fixture):
+    assert math.isclose(
+        doubleml_did_cs_panel_fixture["coef"], doubleml_did_cs_panel_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3
+    )
+
+
+@pytest.mark.ci
+def test_panel_se(doubleml_did_cs_panel_fixture):
+    assert math.isclose(
+        doubleml_did_cs_panel_fixture["se"], doubleml_did_cs_panel_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-3
+    )
+
+
+@pytest.mark.ci
+def test_panel_score(doubleml_did_cs_panel_fixture):
+    assert np.allclose(
+        doubleml_did_cs_panel_fixture["score"], doubleml_did_cs_panel_fixture["score_ext"], rtol=1e-9, atol=1e-3
+    )
+
+
+@pytest.mark.ci
+def test_panel_nuisance_loss(doubleml_did_cs_panel_fixture):
+    for key, value in doubleml_did_cs_panel_fixture["dml_did_nuisance_loss"].items():
+        assert np.allclose(value, doubleml_did_cs_panel_fixture["dml_did_ext_nuisance_loss"][key], rtol=1e-9, atol=1e-3)

From 9e378518109c15529782025e646825f071790d1c Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 6 Jun 2025 13:11:44 +0200
Subject: [PATCH 38/84] add control group test for did_cs_binary

---
 .../test_did_cs_binary_control_groups.py      | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 doubleml/did/tests/test_did_cs_binary_control_groups.py

diff --git a/doubleml/did/tests/test_did_cs_binary_control_groups.py b/doubleml/did/tests/test_did_cs_binary_control_groups.py
new file mode 100644
index 00000000..ea4f2933
--- /dev/null
+++ b/doubleml/did/tests/test_did_cs_binary_control_groups.py
@@ -0,0 +1,31 @@
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+import doubleml as dml
+
+df = dml.did.datasets.make_did_cs_CS2021(n_obs=500, dgp_type=1, n_pre_treat_periods=2, n_periods=4, time_type="float")
+dml_data = dml.data.DoubleMLPanelData(df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"])
+
+args = {
+    "obj_dml_data": dml_data,
+    "ml_g": LinearRegression(),
+    "ml_m": LogisticRegression(),
+    "g_value": 2,
+    "t_value_pre": 0,
+    "t_value_eval": 1,
+    "score": "observational",
+    "n_rep": 1,
+}
+
+
+def test_control_groups_different():
+    dml_did_never_treated = dml.did.DoubleMLDIDCSBinary(control_group="never_treated", **args)
+    dml_did_not_yet_treated = dml.did.DoubleMLDIDCSBinary(control_group="not_yet_treated", **args)
+
+    assert dml_did_never_treated.n_obs_subset != dml_did_not_yet_treated.n_obs_subset
+    # same treatment group
+    assert dml_did_never_treated.data_subset["G_indicator"].sum() == dml_did_not_yet_treated.data_subset["G_indicator"].sum()
+
+    dml_did_never_treated.fit()
+    dml_did_not_yet_treated.fit()
+
+    assert dml_did_never_treated.coef != dml_did_not_yet_treated.coef

From 810eade37a837a3f8c3f0dbae8728c51fc8fea79 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 6 Jun 2025 13:40:50 +0200
Subject: [PATCH 39/84] add tune to did_cs_binary

---
 doubleml/did/did_cs_binary.py                 | 115 ++++++++-
 doubleml/did/tests/test_did_cs_binary_tune.py | 221 ++++++++++++++++++
 2 files changed, 334 insertions(+), 2 deletions(-)
 create mode 100644 doubleml/did/tests/test_did_cs_binary_tune.py

diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index 6b2206a3..161a31c3 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -23,7 +23,7 @@
     _check_score,
     _check_trimming,
 )
-from doubleml.utils._estimation import _dml_cv_predict, _get_cond_smpls_2d
+from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d
 from doubleml.utils._propensity_score import _trimm
 
 
@@ -586,7 +586,118 @@ def _score_elements(self, y, d, t, g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_
     def _nuisance_tuning(
         self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
     ):
-        pass
+        x, y = check_X_y(X=self._x_data, y=self._y_data, force_all_finite=False)
+        _, d = check_X_y(x, self._g_data, force_all_finite=False)  # (d is the G_indicator)
+        _, t = check_X_y(x, self._t_data, force_all_finite=False)
+
+        if scoring_methods is None:
+            scoring_methods = {"ml_g": None, "ml_m": None}
+
+        # nuisance training sets conditional on d and t
+        smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t)
+        train_inds = [train_index for (train_index, _) in smpls]
+        train_inds_d0_t0 = [train_index for (train_index, _) in smpls_d0_t0]
+        train_inds_d0_t1 = [train_index for (train_index, _) in smpls_d0_t1]
+        train_inds_d1_t0 = [train_index for (train_index, _) in smpls_d1_t0]
+        train_inds_d1_t1 = [train_index for (train_index, _) in smpls_d1_t1]
+
+        tune_args = {
+            "n_folds_tune": n_folds_tune,
+            "n_jobs_cv": n_jobs_cv,
+            "search_mode": search_mode,
+            "n_iter_randomized_search": n_iter_randomized_search,
+        }
+
+        g_d0_t0_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d0_t0,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            **tune_args,
+        )
+
+        g_d0_t1_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d0_t1,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            **tune_args,
+        )
+
+        g_d1_t0_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d1_t0,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            **tune_args,
+        )
+
+        g_d1_t1_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d1_t1,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            **tune_args,
+        )
+
+        m_tune_res = list()
+        if self.score == "observational":
+            m_tune_res = _dml_tune(
+                d,
+                x,
+                train_inds,
+                self._learner["ml_m"],
+                param_grids["ml_m"],
+                scoring_methods["ml_m"],
+                **tune_args,
+            )
+
+        g_d0_t0_best_params = [xx.best_params_ for xx in g_d0_t0_tune_res]
+        g_d0_t1_best_params = [xx.best_params_ for xx in g_d0_t1_tune_res]
+        g_d1_t0_best_params = [xx.best_params_ for xx in g_d1_t0_tune_res]
+        g_d1_t1_best_params = [xx.best_params_ for xx in g_d1_t1_tune_res]
+
+        if self.score == "observational":
+            m_best_params = [xx.best_params_ for xx in m_tune_res]
+            params = {
+                "ml_g_d0_t0": g_d0_t0_best_params,
+                "ml_g_d0_t1": g_d0_t1_best_params,
+                "ml_g_d1_t0": g_d1_t0_best_params,
+                "ml_g_d1_t1": g_d1_t1_best_params,
+                "ml_m": m_best_params,
+            }
+            tune_res = {
+                "g_d0_t0_tune": g_d0_t0_tune_res,
+                "g_d0_t1_tune": g_d0_t1_tune_res,
+                "g_d1_t0_tune": g_d1_t0_tune_res,
+                "g_d1_t1_tune": g_d1_t1_tune_res,
+                "m_tune": m_tune_res,
+            }
+        else:
+            params = {
+                "ml_g_d0_t0": g_d0_t0_best_params,
+                "ml_g_d0_t1": g_d0_t1_best_params,
+                "ml_g_d1_t0": g_d1_t0_best_params,
+                "ml_g_d1_t1": g_d1_t1_best_params,
+            }
+            tune_res = {
+                "g_d0_t0_tune": g_d0_t0_tune_res,
+                "g_d0_t1_tune": g_d0_t1_tune_res,
+                "g_d1_t0_tune": g_d1_t0_tune_res,
+                "g_d1_t1_tune": g_d1_t1_tune_res,
+            }
+
+        res = {"params": params, "tune_res": tune_res}
+
+        return res
 
     def _sensitivity_element_est(self, preds):
         y = self._y_data
diff --git a/doubleml/did/tests/test_did_cs_binary_tune.py b/doubleml/did/tests/test_did_cs_binary_tune.py
new file mode 100644
index 00000000..0bd2c6ab
--- /dev/null
+++ b/doubleml/did/tests/test_did_cs_binary_tune.py
@@ -0,0 +1,221 @@
+import math
+
+import numpy as np
+import pytest
+from sklearn.base import clone
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LogisticRegression
+
+import doubleml as dml
+
+from ...tests._utils import draw_smpls
+from ._utils_did_cs_manual import fit_did_cs, tune_nuisance_did_cs
+from ._utils_did_manual import boot_did
+
+
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
+def learner_g(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[LogisticRegression()])
+def learner_m(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["observational", "experimental"])
+def score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def in_sample_normalization(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def tune_on_folds(request):
+    return request.param
+
+
+def get_par_grid(learner):
+    if learner.__class__ in [RandomForestRegressor]:
+        par_grid = {"n_estimators": [5, 10, 20]}
+    else:
+        assert learner.__class__ in [LogisticRegression]
+        par_grid = {"C": np.logspace(-4, 2, 10)}
+    return par_grid
+
+
+@pytest.fixture(scope="module")
+def dml_did_fixture(generate_data_did_binary, learner_g, learner_m, score, in_sample_normalization, tune_on_folds):
+    par_grid = {"ml_g": get_par_grid(learner_g), "ml_m": get_par_grid(learner_m)}
+    n_folds_tune = 4
+
+    boot_methods = ["normal"]
+    n_folds = 2
+    n_rep_boot = 499
+
+    # collect data
+    dml_panel_data = generate_data_did_binary
+    df = dml_panel_data._data.sort_values(by=["id", "t"])
+    # Reorder data before to make both approaches compatible
+    dml_panel_data = dml.data.DoubleMLPanelData(
+        df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]
+    )
+    obj_dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"])
+
+    n_obs = df.shape[0]
+    strata = df["d"] + 2 * df["t"]  # only valid since it values are binary
+    all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=strata)
+
+    # Set machine learning methods for m & g
+    ml_g = clone(learner_g)
+    ml_m = clone(learner_m)
+
+    dml_args = {
+        "ml_g": ml_g,
+        "ml_m": ml_m,
+        "n_folds": n_folds,
+        "score": score,
+        "in_sample_normalization": in_sample_normalization,
+        "draw_sample_splitting": False,
+    }
+
+    dml_did_binary_obj = dml.did.DoubleMLDIDCSBinary(
+        dml_panel_data,
+        g_value=1,
+        t_value_pre=0,
+        t_value_eval=1,
+        **dml_args,
+    )
+
+    dml_did_obj = dml.DoubleMLDIDCS(
+        obj_dml_data,
+        **dml_args,
+    )
+
+    # synchronize the sample splitting
+    dml_did_obj.set_sample_splitting(all_smpls=all_smpls)
+    dml_did_binary_obj.set_sample_splitting(all_smpls=all_smpls)
+
+    # tune hyperparameters
+    np.random.seed(3141)
+    tune_res = dml_did_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False)
+    assert isinstance(tune_res, dml.DoubleMLDIDCS)
+    np.random.seed(3141)
+    tune_res_binary = dml_did_binary_obj.tune(
+        par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False
+    )
+    assert isinstance(tune_res_binary, dml.did.DoubleMLDIDCSBinary)
+
+    dml_did_obj.fit()
+    dml_did_binary_obj.fit()
+
+    # manual fit
+    y = df["y"].values
+    d = df["d"].values
+    x = df[["Z1", "Z2", "Z3", "Z4"]].values
+    t = df["t"].values
+    np.random.seed(3141)
+    smpls = all_smpls[0]
+
+    if tune_on_folds:
+        g_d0_t0_params, g_d0_t1_params, g_d1_t0_params, g_d1_t1_params, m_params = tune_nuisance_did_cs(
+            y, x, d, t, clone(learner_g), clone(learner_m), smpls, score, n_folds_tune, par_grid["ml_g"], par_grid["ml_m"]
+        )
+    else:
+        xx = [(np.arange(len(y)), np.array([]))]
+        g_d0_t0_params, g_d0_t1_params, g_d1_t0_params, g_d1_t1_params, m_params = tune_nuisance_did_cs(
+            y, x, d, t, clone(learner_g), clone(learner_m), xx, score, n_folds_tune, par_grid["ml_g"], par_grid["ml_m"]
+        )
+        g_d0_t0_params = g_d0_t0_params * n_folds
+        g_d0_t1_params = g_d0_t1_params * n_folds
+        g_d1_t0_params = g_d1_t0_params * n_folds
+        g_d1_t1_params = g_d1_t1_params * n_folds
+        if score == "observational":
+            m_params = m_params * n_folds
+        else:
+            assert score == "experimental"
+            m_params = None
+
+    res_manual = fit_did_cs(
+        y,
+        x,
+        d,
+        t,
+        clone(learner_g),
+        clone(learner_m),
+        all_smpls,
+        score,
+        in_sample_normalization,
+        g_d0_t0_params=g_d0_t0_params,
+        g_d0_t1_params=g_d0_t1_params,
+        g_d1_t0_params=g_d1_t0_params,
+        g_d1_t1_params=g_d1_t1_params,
+        m_params=m_params,
+    )
+
+    res_dict = {
+        "coef": dml_did_obj.coef,
+        "coef_binary": dml_did_binary_obj.coef,
+        "coef_manual": res_manual["theta"],
+        "se": dml_did_obj.se,
+        "se_binary": dml_did_binary_obj.se,
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
+
+    for bootstrap in boot_methods:
+        np.random.seed(3141)
+        boot_t_stat = boot_did(
+            y,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_psi_a"],
+            res_manual["all_psi_b"],
+            all_smpls,
+            bootstrap,
+            n_rep_boot,
+        )
+
+        np.random.seed(3141)
+        dml_did_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+        np.random.seed(3141)
+        dml_did_binary_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+
+        res_dict["boot_t_stat" + bootstrap] = dml_did_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_binary"] = dml_did_binary_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_dml_did_coef(dml_did_fixture):
+    assert math.isclose(dml_did_fixture["coef"][0], dml_did_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_did_fixture["coef_binary"][0], dml_did_fixture["coef"][0], rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_did_se(dml_did_fixture):
+    assert math.isclose(dml_did_fixture["se"][0], dml_did_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_did_fixture["se_binary"][0], dml_did_fixture["se"][0], rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_boot(dml_did_fixture):
+    for bootstrap in dml_did_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_did_fixture["boot_t_stat" + bootstrap],
+            dml_did_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+
+        assert np.allclose(
+            dml_did_fixture["boot_t_stat" + bootstrap],
+            dml_did_fixture["boot_t_stat" + bootstrap + "_binary"],
+            rtol=1e-9,
+            atol=1e-4,
+        )

From 6b6116cb608414a3c9313447d89c51a0c04c3651 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 6 Jun 2025 13:53:03 +0200
Subject: [PATCH 40/84] update did_cs_binary sdout test

---
 .../did/tests/test_did_cs_binary_stdout.py    | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 doubleml/did/tests/test_did_cs_binary_stdout.py

diff --git a/doubleml/did/tests/test_did_cs_binary_stdout.py b/doubleml/did/tests/test_did_cs_binary_stdout.py
new file mode 100644
index 00000000..16135636
--- /dev/null
+++ b/doubleml/did/tests/test_did_cs_binary_stdout.py
@@ -0,0 +1,49 @@
+import io
+from contextlib import redirect_stdout
+
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+import doubleml as dml
+
+dml_data = dml.did.datasets.make_did_SZ2020(n_obs=500, dgp_type=1, return_type="DoubleMLPanelData")
+
+
+@pytest.mark.ci
+def test_print_periods():
+    """Test that print_periods parameter correctly controls output printing."""
+
+    # Create test data
+    dml_data = dml.did.datasets.make_did_SZ2020(n_obs=100, return_type="DoubleMLPanelData")
+
+    # Test 1: Default case (print_periods=False) - should not print anything
+    f = io.StringIO()
+    with redirect_stdout(f):
+        _ = dml.did.DoubleMLDIDCSBinary(
+            obj_dml_data=dml_data,
+            ml_g=LinearRegression(),
+            ml_m=LogisticRegression(),
+            g_value=1,
+            t_value_pre=0,
+            t_value_eval=1,
+            print_periods=False,  # Default
+        )
+    output_default = f.getvalue()
+    assert output_default.strip() == "", "Expected no output with print_periods=False"
+
+    # Test 2: With print_periods=True - should print information
+    f = io.StringIO()
+    with redirect_stdout(f):
+        _ = dml.did.DoubleMLDIDCSBinary(
+            obj_dml_data=dml_data,
+            ml_g=LinearRegression(),
+            ml_m=LogisticRegression(),
+            g_value=1,
+            t_value_pre=0,
+            t_value_eval=1,
+            print_periods=True,
+        )
+    output_print = f.getvalue()
+    assert "Evaluation of ATT(1, 1), with pre-treatment period 0" in output_print
+    assert "post-treatment: True" in output_print
+    assert "Control group: never_treated" in output_print

From de324cfe102bc466e571f298e0be2f499911b0f0 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 6 Jun 2025 13:59:54 +0200
Subject: [PATCH 41/84] add exceptions and tests

---
 doubleml/did/did_cs_binary.py                 |  28 ++++
 .../tests/test_did_cs_binary_exceptions.py    | 152 ++++++++++++++++++
 2 files changed, 180 insertions(+)
 create mode 100644 doubleml/did/tests/test_did_cs_binary_exceptions.py

diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index 161a31c3..a34dbf2a 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -806,3 +806,31 @@ def _sensitivity_element_est(self, preds):
             "riesz_rep": scaling * _set_id_positions(rr, **extend_kwargs),
         }
         return element_dict
+
+    def sensitivity_benchmark(self, benchmarking_set, fit_args=None):
+        """
+        Computes a benchmark for a given set of features.
+        Returns a DataFrame containing the corresponding values for cf_y, cf_d, rho and the change in estimates.
+
+        Parameters
+        ----------
+        benchmarking_set : list
+            List of features to be used for benchmarking.
+
+        fit_args : dict, optional
+            Additional arguments for the fit method.
+            Default is None.
+
+        Returns
+        -------
+        benchmark_results : pandas.DataFrame
+            Benchmark results.
+        """
+        if self.score == "experimental":
+            warnings.warn(
+                "Sensitivity benchmarking for experimental score may not be meaningful. "
+                "Consider using score='observational' for conditional treatment assignment.",
+                UserWarning,
+            )
+
+        return super().sensitivity_benchmark(benchmarking_set, fit_args)
diff --git a/doubleml/did/tests/test_did_cs_binary_exceptions.py b/doubleml/did/tests/test_did_cs_binary_exceptions.py
new file mode 100644
index 00000000..b506da2d
--- /dev/null
+++ b/doubleml/did/tests/test_did_cs_binary_exceptions.py
@@ -0,0 +1,152 @@
+from unittest.mock import patch
+
+import numpy as np
+import pandas as pd
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+import doubleml as dml
+
+dml_data = dml.did.datasets.make_did_SZ2020(n_obs=500, dgp_type=1, return_type="DoubleMLPanelData")
+
+valid_arguments = {
+    "obj_dml_data": dml_data,
+    "ml_g": LinearRegression(),
+    "ml_m": LogisticRegression(),
+    "g_value": 1,
+    "t_value_pre": 0,
+    "t_value_eval": 1,
+    "score": "observational",
+    "n_rep": 1,
+    "draw_sample_splitting": True,
+}
+
+
+@pytest.mark.ci
+def test_input():
+    # control group
+    msg = r"The control group has to be one of \['never_treated', 'not_yet_treated'\]. 0 was passed."
+    with pytest.raises(ValueError, match=msg):
+        invalid_arguments = {"control_group": 0}
+        _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments))
+
+    # g value
+    msg = r"The value test is not in the set of treatment group values \[0 1\]."
+    with pytest.raises(ValueError, match=msg):
+        invalid_arguments = {"g_value": "test"}
+        _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments))
+
+    msg = r"The never treated group is not allowed as treatment group \(g_value=0\)."
+    with pytest.raises(ValueError, match=msg):
+        invalid_arguments = {"g_value": 0}
+        _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments))
+
+    msg = r"The never treated group is not allowed as treatment group \(g_value=0\)."
+    with pytest.raises(ValueError, match=msg):
+        invalid_arguments = {"g_value": 0.0}
+        _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments))
+
+    # t values
+    msg = r"The value test is not in the set of evaluation period values \[0 1\]."
+    with pytest.raises(ValueError, match=msg):
+        invalid_arguments = {"t_value_pre": "test"}
+        _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments))
+    with pytest.raises(ValueError, match=msg):
+        invalid_arguments = {"t_value_eval": "test"}
+        _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments))
+
+    # in-sample normalization
+    msg = "in_sample_normalization indicator has to be boolean. Object of type <class 'str'> passed."
+    with pytest.raises(TypeError, match=msg):
+        invalid_arguments = {"in_sample_normalization": "test"}
+        _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments))
+
+    # ml_g classifier
+    msg = r"The ml_g learner LogisticRegression\(\) was identified as"
+    with pytest.raises(ValueError, match=msg):
+        invalid_arguments = {"ml_g": LogisticRegression()}
+        _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments))
+
+
+@pytest.mark.ci
+def test_no_control_group_exception():
+    msg = "No observations in the control group."
+    with pytest.raises(ValueError, match=msg):
+        invalid_data = dml.did.datasets.make_did_SZ2020(n_obs=500, dgp_type=1, return_type="DoubleMLPanelData")
+        invalid_data.data["d"] = 1.0
+        invalid_arguments = {"obj_dml_data": invalid_data, "control_group": "not_yet_treated"}
+        _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments))
+
+
+@pytest.mark.ci
+def test_check_data_exceptions():
+    """Test exception handling for _check_data method in DoubleMLDIDCSBinary"""
+    df = pd.DataFrame(np.random.normal(size=(10, 5)), columns=[f"Col_{i}" for i in range(5)])
+
+    # Test 1: Data has to be DoubleMLPanelData
+    invalid_data_types = [
+        dml.data.DoubleMLData(df, y_col="Col_0", d_cols="Col_1"),
+    ]
+
+    for invalid_data in invalid_data_types:
+        msg = r"For repeated outcomes the data must be of DoubleMLPanelData type\."
+        with pytest.raises(TypeError, match=msg):
+            _ = dml.did.DoubleMLDIDCSBinary(
+                obj_dml_data=invalid_data,
+                ml_g=LinearRegression(),
+                ml_m=LogisticRegression(),
+                g_value=1,
+                t_value_pre=0,
+                t_value_eval=1,
+            )
+
+    # Test 2: Data cannot have instrumental variables
+    df_with_z = dml_data.data.copy()
+    dml_data_with_z = dml.data.DoubleMLPanelData(
+        df_with_z, y_col="y", d_cols="d", id_col="id", t_col="t", z_cols=["Z1"], x_cols=["Z2", "Z3", "Z4"]
+    )
+
+    msg = r"Incompatible data. Z1 have been set as instrumental variable\(s\)."
+    with pytest.raises(NotImplementedError, match=msg):
+        _ = dml.did.DoubleMLDIDCSBinary(
+            obj_dml_data=dml_data_with_z,
+            ml_g=LinearRegression(),
+            ml_m=LogisticRegression(),
+            g_value=1,
+            t_value_pre=0,
+            t_value_eval=1,
+        )
+
+    # Test 3: Data must have exactly one treatment variable (using mock)
+    with patch.object(dml_data.__class__, "n_treat", property(lambda self: 2)):
+        msg = (
+            "Incompatible data. To fit an DID model with DML exactly one variable needs to be specified as treatment variable."
+        )
+        with pytest.raises(ValueError, match=msg):
+            _ = dml.did.DoubleMLDIDCSBinary(
+                obj_dml_data=dml_data,
+                ml_g=LinearRegression(),
+                ml_m=LogisticRegression(),
+                g_value=1,
+                t_value_pre=0,
+                t_value_eval=1,
+            )
+
+
+@pytest.mark.ci
+def test_benchmark_warning():
+    """Test warning when sensitivity_benchmark is called with experimental score"""
+    args = {
+        "obj_dml_data": dml_data,
+        "ml_g": LinearRegression(),
+        "ml_m": LogisticRegression(),
+        "g_value": 1,
+        "t_value_pre": 0,
+        "t_value_eval": 1,
+        "n_rep": 1,
+    }
+    # Create a DID model with experimental score
+    did_model = dml.did.DoubleMLDIDCSBinary(**args, score="experimental")
+    did_model.fit()
+    with pytest.warns(UserWarning, match="Sensitivity benchmarking for experimental score may not be meaningful"):
+        did_model.sensitivity_benchmark(["Z1", "Z2"])

From 8d0c52c54405a089e9171a5380185fc7ebff272a Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 6 Jun 2025 15:02:54 +0200
Subject: [PATCH 42/84] simplify did_cs_binary nuisance estimation

---
 doubleml/did/did_cs_binary.py | 131 +++++++++++-----------------------
 1 file changed, 40 insertions(+), 91 deletions(-)

diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index a34dbf2a..fafcecf4 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -318,6 +318,34 @@ def _preprocess_data(self, g_value, pre_t, eval_t):
         data_subset = data_subset.assign(t_indicator=data_subset[t_col] == eval_t)
         return data_subset
 
+    def _estimate_conditional_g(
+        self, x, y, d_val, t_val, d_arr, t_arr, smpls_cond, external_prediction, learner_param_key, n_jobs_cv, return_models
+    ):
+        """Helper function to estimate conditional g_hat for fixed d and t."""
+        g_hat_cond = {}
+        condition = (d_arr == d_val) & (t_arr == t_val)
+
+        if external_prediction is not None:
+            ml_g_targets = np.full_like(y, np.nan, dtype="float64")
+            ml_g_targets[condition] = y[condition]
+            ml_pred = _get_id_positions(external_prediction, self.id_positions)
+            g_hat_cond = {"preds": ml_pred, "targets": ml_g_targets, "models": None}
+        else:
+            g_hat_cond = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls_cond,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params(learner_param_key),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+            _check_finite_predictions(g_hat_cond["preds"], self._learner["ml_g"], "ml_g", smpls_cond)
+            g_hat_cond["targets"] = g_hat_cond["targets"].astype(float)
+            g_hat_cond["targets"][~condition] = np.nan
+        return g_hat_cond
+
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
 
         # Here: d is a binary treatment indicator
@@ -333,97 +361,18 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         # nuisance g
         smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t)
 
-        # nuisance g for d==0 & t==0
-        if external_predictions["ml_g_d0_t0"] is not None:
-            ml_g_d0_t0_targets = np.full_like(y, np.nan, dtype="float64")
-            ml_g_d0_t0_targets[((d == 0) & (t == 0))] = y[((d == 0) & (t == 0))]
-            ml_d0_t0_pred = _get_id_positions(external_predictions["ml_g_d0_t0"], self.id_positions)
-            g_hat_d0_t0 = {"preds": ml_d0_t0_pred, "targets": ml_g_d0_t0_targets, "models": None}
-        else:
-            g_hat_d0_t0 = _dml_cv_predict(
-                self._learner["ml_g"],
-                x,
-                y,
-                smpls_d0_t0,
-                n_jobs=n_jobs_cv,
-                est_params=self._get_params("ml_g_d0_t0"),
-                method=self._predict_method["ml_g"],
-                return_models=return_models,
-            )
-
-            _check_finite_predictions(g_hat_d0_t0["preds"], self._learner["ml_g"], "ml_g", smpls)
-            # adjust target values to consider only compatible subsamples
-            g_hat_d0_t0["targets"] = g_hat_d0_t0["targets"].astype(float)
-            g_hat_d0_t0["targets"][np.invert((d == 0) & (t == 0))] = np.nan
-
-        # nuisance g for d==0 & t==1
-        if external_predictions["ml_g_d0_t1"] is not None:
-            ml_g_d0_t1_targets = np.full_like(y, np.nan, dtype="float64")
-            ml_g_d0_t1_targets[((d == 0) & (t == 1))] = y[((d == 0) & (t == 1))]
-            ml_d0_t1_pred = _get_id_positions(external_predictions["ml_g_d0_t1"], self.id_positions)
-            g_hat_d0_t1 = {"preds": ml_d0_t1_pred, "targets": ml_g_d0_t1_targets, "models": None}
-        else:
-            g_hat_d0_t1 = _dml_cv_predict(
-                self._learner["ml_g"],
-                x,
-                y,
-                smpls_d0_t1,
-                n_jobs=n_jobs_cv,
-                est_params=self._get_params("ml_g_d0_t1"),
-                method=self._predict_method["ml_g"],
-                return_models=return_models,
-            )
-
-            _check_finite_predictions(g_hat_d0_t1["preds"], self._learner["ml_g"], "ml_g", smpls)
-            # adjust target values to consider only compatible subsamples
-            g_hat_d0_t1["targets"] = g_hat_d0_t1["targets"].astype(float)
-            g_hat_d0_t1["targets"][np.invert((d == 0) & (t == 1))] = np.nan
-
-        # nuisance g for d==1 & t==0
-        if external_predictions["ml_g_d1_t0"] is not None:
-            ml_g_d1_t0_targets = np.full_like(y, np.nan, dtype="float64")
-            ml_g_d1_t0_targets[((d == 1) & (t == 0))] = y[((d == 1) & (t == 0))]
-            ml_d1_t0_pred = _get_id_positions(external_predictions["ml_g_d1_t0"], self.id_positions)
-            g_hat_d1_t0 = {"preds": ml_d1_t0_pred, "targets": ml_g_d1_t0_targets, "models": None}
-        else:
-            g_hat_d1_t0 = _dml_cv_predict(
-                self._learner["ml_g"],
-                x,
-                y,
-                smpls_d1_t0,
-                n_jobs=n_jobs_cv,
-                est_params=self._get_params("ml_g_d1_t0"),
-                method=self._predict_method["ml_g"],
-                return_models=return_models,
-            )
-
-            _check_finite_predictions(g_hat_d1_t0["preds"], self._learner["ml_g"], "ml_g", smpls)
-            # adjust target values to consider only compatible subsamples
-            g_hat_d1_t0["targets"] = g_hat_d1_t0["targets"].astype(float)
-            g_hat_d1_t0["targets"][np.invert((d == 1) & (t == 0))] = np.nan
-
-        # nuisance g for d==1 & t==1
-        if external_predictions["ml_g_d1_t1"] is not None:
-            ml_g_d1_t1_targets = np.full_like(y, np.nan, dtype="float64")
-            ml_g_d1_t1_targets[((d == 1) & (t == 1))] = y[((d == 1) & (t == 1))]
-            ml_d1_t1_pred = _get_id_positions(external_predictions["ml_g_d1_t1"], self.id_positions)
-            g_hat_d1_t1 = {"preds": ml_d1_t1_pred, "targets": ml_g_d1_t1_targets, "models": None}
-        else:
-            g_hat_d1_t1 = _dml_cv_predict(
-                self._learner["ml_g"],
-                x,
-                y,
-                smpls_d1_t1,
-                n_jobs=n_jobs_cv,
-                est_params=self._get_params("ml_g_d1_t1"),
-                method=self._predict_method["ml_g"],
-                return_models=return_models,
-            )
-
-            _check_finite_predictions(g_hat_d1_t1["preds"], self._learner["ml_g"], "ml_g", smpls)
-            # adjust target values to consider only compatible subsamples
-            g_hat_d1_t1["targets"] = g_hat_d1_t1["targets"].astype(float)
-            g_hat_d1_t1["targets"][np.invert((d == 1) & (t == 1))] = np.nan
+        g_hat_d0_t0 = self._estimate_conditional_g(
+            x, y, 0, 0, d, t, smpls_d0_t0, external_predictions["ml_g_d0_t0"], "ml_g_d0_t0", n_jobs_cv, return_models
+        )
+        g_hat_d0_t1 = self._estimate_conditional_g(
+            x, y, 0, 1, d, t, smpls_d0_t1, external_predictions["ml_g_d0_t1"], "ml_g_d0_t1", n_jobs_cv, return_models
+        )
+        g_hat_d1_t0 = self._estimate_conditional_g(
+            x, y, 1, 0, d, t, smpls_d1_t0, external_predictions["ml_g_d1_t0"], "ml_g_d1_t0", n_jobs_cv, return_models
+        )
+        g_hat_d1_t1 = self._estimate_conditional_g(
+            x, y, 1, 1, d, t, smpls_d1_t1, external_predictions["ml_g_d1_t1"], "ml_g_d1_t1", n_jobs_cv, return_models
+        )
 
         # only relevant for observational setting
         m_hat = {"preds": None, "targets": None, "models": None}

From af45f7fb8b7dee34a480b7843054163a37d47efc Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 11 Jun 2025 08:34:48 +0200
Subject: [PATCH 43/84] add __str__ method to did_cs_binary

---
 doubleml/did/did_cs_binary.py | 53 +++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index fafcecf4..7788f4b3 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -156,6 +156,59 @@ def __init__(
         self._sensitivity_implemented = True
         self._external_predictions_implemented = True
 
+    def __str__(self):
+        class_name = self.__class__.__name__
+        header = f"================== {class_name} Object ==================\n"
+        data_summary = self._dml_data._data_summary_str()
+        score_info = (
+            f"Score function: {str(self.score)}\n"
+            f"Treatment group: {str(self.g_value)}\n"
+            f"Pre-treatment period: {str(self.t_value_pre)}\n"
+            f"Evaluation period: {str(self.t_value_eval)}\n"
+            f"Control group: {str(self.control_group)}\n"
+            f"Anticipation periods: {str(self.anticipation_periods)}\n"
+            f"Effective sample size: {str(self.n_obs_subset)}\n"
+        )
+        learner_info = ""
+        for key, value in self.learner.items():
+            learner_info += f"Learner {key}: {str(value)}\n"
+        if self.nuisance_loss is not None:
+            learner_info += "Out-of-sample Performance:\n"
+            is_classifier = [value for value in self._is_classifier.values()]
+            is_regressor = [not value for value in is_classifier]
+            if any(is_regressor):
+                learner_info += "Regression:\n"
+                for learner in [key for key, value in self._is_classifier.items() if value is False]:
+                    learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n"
+            if any(is_classifier):
+                learner_info += "Classification:\n"
+                for learner in [key for key, value in self._is_classifier.items() if value is True]:
+                    learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n"
+
+        if self._is_cluster_data:
+            resampling_info = (
+                f"No. folds per cluster: {self._n_folds_per_cluster}\n"
+                f"No. folds: {self.n_folds}\n"
+                f"No. repeated sample splits: {self.n_rep}\n"
+            )
+        else:
+            resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n"
+        fit_summary = str(self.summary)
+        res = (
+            header
+            + "\n------------------ Data summary      ------------------\n"
+            + data_summary
+            + "\n------------------ Score & algorithm ------------------\n"
+            + score_info
+            + "\n------------------ Machine learner   ------------------\n"
+            + learner_info
+            + "\n------------------ Resampling        ------------------\n"
+            + resampling_info
+            + "\n------------------ Fit summary       ------------------\n"
+            + fit_summary
+        )
+        return res
+
     @property
     def g_value(self):
         """

From 698f161945dadb75a4d2637e311c811c9542338a Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 11 Jun 2025 09:31:54 +0200
Subject: [PATCH 44/84] add test on panel data to did_cs binary

---
 doubleml/did/did_cs.py                        |  18 +-
 .../test_did_cs_binary_vs_did_cs_panel.py     | 202 ++++++++++++++++++
 2 files changed, 215 insertions(+), 5 deletions(-)
 create mode 100644 doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py

diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py
index 5984399c..8136f60c 100644
--- a/doubleml/did/did_cs.py
+++ b/doubleml/did/did_cs.py
@@ -227,7 +227,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         # nuisance g
         smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t)
         if external_predictions["ml_g_d0_t0"] is not None:
-            g_hat_d0_t0 = {"preds": external_predictions["ml_g_d0_t0"], "targets": None, "models": None}
+            g_hat_d0_t0_targets = np.full_like(y, np.nan, dtype="float64")
+            g_hat_d0_t0_targets[(d == 0) & (t == 0)] = y[(d == 0) & (t == 0)]
+            g_hat_d0_t0 = {"preds": external_predictions["ml_g_d0_t0"], "targets": g_hat_d0_t0_targets, "models": None}
         else:
             g_hat_d0_t0 = _dml_cv_predict(
                 self._learner["ml_g"],
@@ -243,7 +245,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             g_hat_d0_t0["targets"] = g_hat_d0_t0["targets"].astype(float)
             g_hat_d0_t0["targets"][np.invert((d == 0) & (t == 0))] = np.nan
         if external_predictions["ml_g_d0_t1"] is not None:
-            g_hat_d0_t1 = {"preds": external_predictions["ml_g_d0_t1"], "targets": None, "models": None}
+            g_hat_d0_t1_targets = np.full_like(y, np.nan, dtype="float64")
+            g_hat_d0_t1_targets[(d == 0) & (t == 1)] = y[(d == 0) & (t == 1)]
+            g_hat_d0_t1 = {"preds": external_predictions["ml_g_d0_t1"], "targets": g_hat_d0_t1_targets, "models": None}
         else:
             g_hat_d0_t1 = _dml_cv_predict(
                 self._learner["ml_g"],
@@ -258,7 +262,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             g_hat_d0_t1["targets"] = g_hat_d0_t1["targets"].astype(float)
             g_hat_d0_t1["targets"][np.invert((d == 0) & (t == 1))] = np.nan
         if external_predictions["ml_g_d1_t0"] is not None:
-            g_hat_d1_t0 = {"preds": external_predictions["ml_g_d1_t0"], "targets": None, "models": None}
+            g_hat_d1_t0_targets = np.full_like(y, np.nan, dtype="float64")
+            g_hat_d1_t0_targets[(d == 1) & (t == 0)] = y[(d == 1) & (t == 0)]
+            g_hat_d1_t0 = {"preds": external_predictions["ml_g_d1_t0"], "targets": g_hat_d1_t0_targets, "models": None}
         else:
             g_hat_d1_t0 = _dml_cv_predict(
                 self._learner["ml_g"],
@@ -273,7 +279,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             g_hat_d1_t0["targets"] = g_hat_d1_t0["targets"].astype(float)
             g_hat_d1_t0["targets"][np.invert((d == 1) & (t == 0))] = np.nan
         if external_predictions["ml_g_d1_t1"] is not None:
-            g_hat_d1_t1 = {"preds": external_predictions["ml_g_d1_t1"], "targets": None, "models": None}
+            g_hat_d1_t1_targets = np.full_like(y, np.nan, dtype="float64")
+            g_hat_d1_t1_targets[(d == 1) & (t == 1)] = y[(d == 1) & (t == 1)]
+            g_hat_d1_t1 = {"preds": external_predictions["ml_g_d1_t1"], "targets": g_hat_d1_t1_targets, "models": None}
         else:
             g_hat_d1_t1 = _dml_cv_predict(
                 self._learner["ml_g"],
@@ -293,7 +301,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         if self.score == "observational":
             # nuisance m
             if external_predictions["ml_m"] is not None:
-                m_hat = {"preds": external_predictions["ml_m"], "targets": None, "models": None}
+                m_hat = {"preds": external_predictions["ml_m"], "targets": d, "models": None}
             else:
                 m_hat = _dml_cv_predict(
                     self._learner["ml_m"],
diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py
new file mode 100644
index 00000000..8fab2615
--- /dev/null
+++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py
@@ -0,0 +1,202 @@
+import math
+
+import numpy as np
+import pytest
+from sklearn.base import clone
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+import doubleml as dml
+from doubleml.did.datasets import make_did_CS2021
+from doubleml.did.utils._did_utils import _get_id_positions
+
+
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [
+            RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42),
+        ],
+    ],
+)
+def learner(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["observational", "experimental"])
+def score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def in_sample_normalization(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[0.1])
+def trimming_threshold(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["datetime", "float"])
+def time_type(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold):
+    n_obs = 500
+    dpg = 1
+
+    # collect data
+    df = make_did_CS2021(n_obs=n_obs, dgp_type=dpg, time_type=time_type)
+    dml_panel_data = dml.data.DoubleMLPanelData(
+        df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]
+    )
+
+    dml_args = {
+        "ml_g": clone(learner[0]),
+        "ml_m": clone(learner[1]),
+        "n_folds": 3,
+        "score": score,
+        "in_sample_normalization": in_sample_normalization,
+        "trimming_threshold": trimming_threshold,
+        "draw_sample_splitting": True,
+    }
+
+    dml_did_binary_obj = dml.did.DoubleMLDIDCSBinary(
+        dml_panel_data,
+        g_value=dml_panel_data.g_values[0],
+        t_value_pre=dml_panel_data.t_values[0],
+        t_value_eval=dml_panel_data.t_values[1],
+        **dml_args,
+    )
+    dml_did_binary_obj.fit()
+
+    df_subset = dml_did_binary_obj.data_subset.copy()
+    dml_data = dml.data.DoubleMLData(
+        df_subset, y_col="y", d_cols="G_indicator", x_cols=["Z1", "Z2", "Z3", "Z4"], t_col="t_indicator"
+    )
+    dml_did_obj = dml.DoubleMLDIDCS(
+        dml_data,
+        **dml_args,
+    )
+
+    # use external predictions (sample splitting is hard to synchronize)
+    ext_predictions = {"G_indicator": {}}
+    ext_predictions["G_indicator"]["ml_g_d0_t0"] = _get_id_positions(
+        dml_did_binary_obj.predictions["ml_g_d0_t0"][:, :, 0], dml_did_binary_obj._id_positions
+    )
+    ext_predictions["G_indicator"]["ml_g_d0_t1"] = _get_id_positions(
+        dml_did_binary_obj.predictions["ml_g_d0_t1"][:, :, 0], dml_did_binary_obj._id_positions
+    )
+    ext_predictions["G_indicator"]["ml_g_d1_t0"] = _get_id_positions(
+        dml_did_binary_obj.predictions["ml_g_d1_t0"][:, :, 0], dml_did_binary_obj._id_positions
+    )
+    ext_predictions["G_indicator"]["ml_g_d1_t1"] = _get_id_positions(
+        dml_did_binary_obj.predictions["ml_g_d1_t1"][:, :, 0], dml_did_binary_obj._id_positions
+    )
+    if score == "observational":
+        ext_predictions["G_indicator"]["ml_m"] = _get_id_positions(
+            dml_did_binary_obj.predictions["ml_m"][:, :, 0], dml_did_binary_obj._id_positions
+        )
+    dml_did_obj.fit(external_predictions=ext_predictions)
+
+    res_dict = {
+        "coef": dml_did_obj.coef,
+        "coef_binary": dml_did_binary_obj.coef,
+        "se": dml_did_obj.se,
+        "se_binary": dml_did_binary_obj.se,
+        "nuisance_loss": dml_did_obj.nuisance_loss,
+        "nuisance_loss_binary": dml_did_binary_obj.nuisance_loss,
+        "dml_did_binary_obj": dml_did_binary_obj,
+    }
+
+    # sensitivity tests
+    res_dict["sensitivity_elements"] = dml_did_obj.sensitivity_elements
+    res_dict["sensitivity_elements_binary"] = dml_did_binary_obj.sensitivity_elements
+
+    dml_did_obj.sensitivity_analysis()
+    dml_did_binary_obj.sensitivity_analysis()
+
+    res_dict["sensitivity_params"] = dml_did_obj.sensitivity_params
+    res_dict["sensitivity_params_binary"] = dml_did_binary_obj.sensitivity_params
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_coefs(dml_did_binary_vs_did_fixture):
+    assert math.isclose(
+        dml_did_binary_vs_did_fixture["coef_binary"][0], dml_did_binary_vs_did_fixture["coef"][0], rel_tol=1e-9, abs_tol=1e-4
+    )
+
+
+@pytest.mark.ci
+def test_ses(dml_did_binary_vs_did_fixture):
+    assert math.isclose(
+        dml_did_binary_vs_did_fixture["se_binary"][0], dml_did_binary_vs_did_fixture["se"][0], rel_tol=1e-9, abs_tol=1e-4
+    )
+
+
+# No Boostrap Tests as the observations are not ordered in the same way
+
+
+@pytest.mark.ci
+def test_nuisance_loss(dml_did_binary_vs_did_fixture):
+    assert (
+        dml_did_binary_vs_did_fixture["nuisance_loss"].keys() == dml_did_binary_vs_did_fixture["nuisance_loss_binary"].keys()
+    )
+    for key, value in dml_did_binary_vs_did_fixture["nuisance_loss"].items():
+        assert np.allclose(value, dml_did_binary_vs_did_fixture["nuisance_loss_binary"][key], rtol=1e-9, atol=1e-3)
+
+
+@pytest.mark.ci
+def test_sensitivity_elements(dml_did_binary_vs_did_fixture):
+    sensitivity_element_names = ["sigma2", "nu2"]
+    for sensitivity_element in sensitivity_element_names:
+        assert np.allclose(
+            dml_did_binary_vs_did_fixture["sensitivity_elements"][sensitivity_element],
+            dml_did_binary_vs_did_fixture["sensitivity_elements_binary"][sensitivity_element],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+    for sensitivity_element in ["psi_sigma2", "psi_nu2", "riesz_rep"]:
+        dml_binary_obj = dml_did_binary_vs_did_fixture["dml_did_binary_obj"]
+        scaling = dml_binary_obj.n_obs_subset / dml_binary_obj._dml_data.n_obs
+        binary_sensitivity_element = scaling * _get_id_positions(
+            dml_did_binary_vs_did_fixture["sensitivity_elements_binary"][sensitivity_element], dml_binary_obj._id_positions
+        )
+        assert np.allclose(
+            dml_did_binary_vs_did_fixture["sensitivity_elements"][sensitivity_element],
+            binary_sensitivity_element,
+            rtol=1e-9,
+            atol=1e-4,
+        )
+
+
+@pytest.mark.ci
+def test_sensitivity_params(dml_did_binary_vs_did_fixture):
+    for key in ["theta", "se", "ci"]:
+        assert np.allclose(
+            dml_did_binary_vs_did_fixture["sensitivity_params"][key]["lower"],
+            dml_did_binary_vs_did_fixture["sensitivity_params_binary"][key]["lower"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+        assert np.allclose(
+            dml_did_binary_vs_did_fixture["sensitivity_params"][key]["upper"],
+            dml_did_binary_vs_did_fixture["sensitivity_params_binary"][key]["upper"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+
+    for key in ["rv", "rva"]:
+        assert np.allclose(
+            dml_did_binary_vs_did_fixture["sensitivity_params"][key],
+            dml_did_binary_vs_did_fixture["sensitivity_params_binary"][key],
+            rtol=1e-9,
+            atol=1e-4,
+        )

From 0a46b5966a993111b66405a069af64b6969d019e Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 11 Jun 2025 11:00:54 +0200
Subject: [PATCH 45/84] add panel type to did multi

---
 doubleml/did/did_multi.py                     |  38 +++-
 .../did/tests/test_did_multi_vs_binary.py     |   2 +-
 .../did/tests/test_did_multi_vs_cs_binary.py  | 208 ++++++++++++++++++
 3 files changed, 238 insertions(+), 10 deletions(-)
 create mode 100644 doubleml/did/tests/test_did_multi_vs_cs_binary.py

diff --git a/doubleml/did/did_multi.py b/doubleml/did/did_multi.py
index 8c5d5163..c8f54313 100644
--- a/doubleml/did/did_multi.py
+++ b/doubleml/did/did_multi.py
@@ -12,6 +12,7 @@
 from doubleml.data import DoubleMLPanelData
 from doubleml.did.did_aggregation import DoubleMLDIDAggregation
 from doubleml.did.did_binary import DoubleMLDIDBinary
+from doubleml.did.did_cs_binary import DoubleMLDIDCSBinary
 from doubleml.did.utils._aggregation import (
     _check_did_aggregation_dict,
     _compute_did_eventstudy_aggregation_weights,
@@ -31,7 +32,7 @@
 from doubleml.did.utils._plot import add_jitter
 from doubleml.double_ml import DoubleML
 from doubleml.double_ml_framework import concat
-from doubleml.utils._checks import _check_score, _check_trimming
+from doubleml.utils._checks import _check_bool, _check_score, _check_trimming
 from doubleml.utils._descriptive import generate_summary
 from doubleml.utils.gain_statistics import gain_statistics
 
@@ -80,6 +81,10 @@ class DoubleMLDIDMulti:
         from the pretreatment covariates.
         Default is ``'observational'``.
 
+    panel : bool
+        Indicates whether to rely on panel data structure (``True``) or repeated cross sections (``False``).
+        Default is ``True``.
+
     in_sample_normalization : bool
         Indicates whether to use in-sample normalization of weights.
         Default is ``True``.
@@ -140,6 +145,7 @@ def __init__(
         n_folds=5,
         n_rep=1,
         score="observational",
+        panel=True,
         in_sample_normalization=True,
         trimming_rule="truncate",
         trimming_threshold=1e-2,
@@ -179,6 +185,9 @@ def __init__(
         valid_scores = ["observational", "experimental"]
         _check_score(self.score, valid_scores, allow_callable=False)
 
+        _check_bool(panel, "panel")
+        self._panel = panel
+
         # initialize framework which is constructed after the fit method is called
         self._framework = None
 
@@ -332,6 +341,13 @@ def never_treated_value(self):
         """
         return self._never_treated_value
 
+    @property
+    def panel(self):
+        """
+        Indicates whether to rely on panel data structure (``True``) or repeated cross sections (``False``).
+        """
+        return self._panel
+
     @property
     def in_sample_normalization(self):
         """
@@ -1250,7 +1266,10 @@ def _check_external_predictions(self, external_predictions):
                 + f"Passed keys: {set(external_predictions.keys())}."
             )
 
-        expected_learner_keys = ["ml_g0", "ml_g1", "ml_m"]
+        if self.panel:
+            expected_learner_keys = ["ml_g0", "ml_g1", "ml_m"]
+        else:
+            expected_learner_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1", "ml_m"]
         for key, value in external_predictions.items():
             if not isinstance(value, dict):
                 raise TypeError(
@@ -1268,12 +1287,7 @@ def _rename_external_predictions(self, external_predictions):
         d_col = self._dml_data.d_cols[0]
         ext_pred_dict = {gt_combination: {d_col: {}} for gt_combination in self.gt_labels}
         for gt_combination in self.gt_labels:
-            if "ml_g0" in external_predictions[gt_combination]:
-                ext_pred_dict[gt_combination][d_col]["ml_g0"] = external_predictions[gt_combination]["ml_g0"]
-            if "ml_g1" in external_predictions[gt_combination]:
-                ext_pred_dict[gt_combination][d_col]["ml_g1"] = external_predictions[gt_combination]["ml_g1"]
-            if "ml_m" in external_predictions[gt_combination]:
-                ext_pred_dict[gt_combination][d_col]["ml_m"] = external_predictions[gt_combination]["ml_m"]
+            ext_pred_dict[gt_combination][d_col].update(external_predictions[gt_combination])
 
         return ext_pred_dict
 
@@ -1304,9 +1318,15 @@ def _initialize_models(self):
             "draw_sample_splitting": True,
             "print_periods": self._print_periods,
         }
+        if self.panel:
+            ModelClass = DoubleMLDIDBinary
+        else:
+            ModelClass = DoubleMLDIDCSBinary
+
+        # iterate over all group-time combinations
         for i_model, (g_value, t_value_pre, t_value_eval) in enumerate(self.gt_combinations):
             # initialize models for all levels
-            model = DoubleMLDIDBinary(g_value=g_value, t_value_pre=t_value_pre, t_value_eval=t_value_eval, **kwargs)
+            model = ModelClass(g_value=g_value, t_value_pre=t_value_pre, t_value_eval=t_value_eval, **kwargs)
 
             modellist[i_model] = model
 
diff --git a/doubleml/did/tests/test_did_multi_vs_binary.py b/doubleml/did/tests/test_did_multi_vs_binary.py
index 40b877b2..15d3fd0c 100644
--- a/doubleml/did/tests/test_did_multi_vs_binary.py
+++ b/doubleml/did/tests/test_did_multi_vs_binary.py
@@ -49,7 +49,7 @@ def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_nor
     n_obs = 500
     dpg = 1
     boot_methods = ["normal"]
-    n_rep_boot = 50000
+    n_rep_boot = 500
 
     # collect data
     df = make_did_CS2021(n_obs=n_obs, dgp_type=dpg, time_type=time_type)
diff --git a/doubleml/did/tests/test_did_multi_vs_cs_binary.py b/doubleml/did/tests/test_did_multi_vs_cs_binary.py
new file mode 100644
index 00000000..59886854
--- /dev/null
+++ b/doubleml/did/tests/test_did_multi_vs_cs_binary.py
@@ -0,0 +1,208 @@
+import math
+
+import numpy as np
+import pytest
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+import doubleml as dml
+from doubleml.did.datasets import make_did_CS2021
+from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
+
+
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [
+            RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42),
+        ],
+    ],
+)
+def learner(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["observational", "experimental"])
+def score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def in_sample_normalization(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[0.1])
+def trimming_threshold(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["datetime", "float"])
+def time_type(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold):
+    n_obs = 500
+    dpg = 1
+    boot_methods = ["normal"]
+    n_rep_boot = 500
+
+    # collect data
+    df = make_did_CS2021(n_obs=n_obs, dgp_type=dpg, time_type=time_type)
+    dml_panel_data = dml.data.DoubleMLPanelData(
+        df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]
+    )
+
+    dml_args = {
+        "n_folds": 3,
+        "score": score,
+        "in_sample_normalization": in_sample_normalization,
+        "trimming_threshold": trimming_threshold,
+        "draw_sample_splitting": True,
+    }
+    gt_combination = [(dml_panel_data.g_values[0], dml_panel_data.t_values[0], dml_panel_data.t_values[1])]
+    dml_did_multi_obj = dml.did.DoubleMLDIDMulti(
+        dml_panel_data,
+        ml_g=learner[0],
+        ml_m=learner[1],
+        gt_combinations=gt_combination,
+        panel=False,
+        **dml_args,
+    )
+    dml_did_multi_obj.fit()
+
+    treatment_col = dml_panel_data.d_cols[0]
+    ext_pred_dict = {treatment_col: {}}
+    all_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"]
+    for key in all_keys:
+        ext_pred_dict["d"][key] = dml_did_multi_obj.modellist[0].predictions[key][:, :, 0]
+    if score == "observational":
+        ext_pred_dict[treatment_col]["ml_m"] = dml_did_multi_obj.modellist[0].predictions["ml_m"][:, :, 0]
+
+    dml_did_binary_obj = dml.did.DoubleMLDIDCSBinary(
+        dml_panel_data,
+        g_value=gt_combination[0][0],
+        t_value_pre=gt_combination[0][1],
+        t_value_eval=gt_combination[0][2],
+        ml_g=DMLDummyRegressor(),
+        ml_m=DMLDummyClassifier(),
+        **dml_args,
+    )
+    dml_did_binary_obj.fit(external_predictions=ext_pred_dict)
+
+    res_dict = {
+        "coef_multi": dml_did_multi_obj.coef,
+        "coef_binary": dml_did_binary_obj.coef,
+        "se_multi": dml_did_multi_obj.se,
+        "se_binary": dml_did_binary_obj.se,
+        "boot_methods": boot_methods,
+        "nuisance_loss_multi": dml_did_multi_obj.nuisance_loss,
+        "nuisance_loss_binary": dml_did_binary_obj.nuisance_loss,
+    }
+
+    for bootstrap in boot_methods:
+        np.random.seed(3141)
+        dml_did_multi_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+        np.random.seed(3141)
+        dml_did_binary_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+
+        # approximately same ci (bootstrap not identical due to size of score)
+        res_dict["boot_ci" + bootstrap + "_multi"] = dml_did_multi_obj.confint(joint=True)
+        res_dict["boot_ci" + bootstrap + "_binary"] = dml_did_binary_obj.confint(joint=True)
+
+    # sensitivity tests
+    res_dict["sensitivity_elements_multi"] = dml_did_multi_obj.sensitivity_elements
+    res_dict["sensitivity_elements_binary"] = dml_did_binary_obj.framework.sensitivity_elements
+
+    dml_did_multi_obj.sensitivity_analysis()
+    dml_did_binary_obj.sensitivity_analysis()
+
+    res_dict["sensitivity_params_multi"] = dml_did_multi_obj.sensitivity_params
+    res_dict["sensitivity_params_binary"] = dml_did_binary_obj.sensitivity_params
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_coefs(dml_did_binary_vs_did_multi_fixture):
+    assert math.isclose(
+        dml_did_binary_vs_did_multi_fixture["coef_binary"][0],
+        dml_did_binary_vs_did_multi_fixture["coef_multi"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
+
+
+@pytest.mark.ci
+def test_se(dml_did_binary_vs_did_multi_fixture):
+    assert math.isclose(
+        dml_did_binary_vs_did_multi_fixture["se_binary"][0],
+        dml_did_binary_vs_did_multi_fixture["se_multi"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
+
+
+@pytest.mark.ci
+def test_boot(dml_did_binary_vs_did_multi_fixture):
+    for bootstrap in dml_did_binary_vs_did_multi_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_did_binary_vs_did_multi_fixture["boot_ci" + bootstrap + "_multi"].values,
+            dml_did_binary_vs_did_multi_fixture["boot_ci" + bootstrap + "_binary"].values,
+            atol=1e-2,
+        )
+
+
+@pytest.mark.ci
+def test_nuisance_loss(dml_did_binary_vs_did_multi_fixture):
+    assert (
+        dml_did_binary_vs_did_multi_fixture["nuisance_loss_multi"].keys()
+        == dml_did_binary_vs_did_multi_fixture["nuisance_loss_binary"].keys()
+    )
+    for key, value in dml_did_binary_vs_did_multi_fixture["nuisance_loss_multi"].items():
+        assert np.allclose(value, dml_did_binary_vs_did_multi_fixture["nuisance_loss_binary"][key], rtol=1e-9, atol=1e-3)
+
+
+@pytest.mark.ci
+def test_sensitivity_elements(dml_did_binary_vs_did_multi_fixture):
+    elements_multi = dml_did_binary_vs_did_multi_fixture["sensitivity_elements_multi"]
+    elements_binary = dml_did_binary_vs_did_multi_fixture["sensitivity_elements_binary"]
+    sensitivity_element_names = ["max_bias", "psi_max_bias", "sigma2", "nu2"]
+    for sensitivity_element in sensitivity_element_names:
+        assert np.allclose(
+            elements_multi[sensitivity_element],
+            elements_binary[sensitivity_element],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+
+
+@pytest.mark.ci
+def test_sensitivity_params(dml_did_binary_vs_did_multi_fixture):
+    multi_params = dml_did_binary_vs_did_multi_fixture["sensitivity_params_multi"]
+    binary_params = dml_did_binary_vs_did_multi_fixture["sensitivity_params_binary"]
+    for key in ["theta", "se", "ci"]:
+        assert np.allclose(
+            multi_params[key]["lower"],
+            binary_params[key]["lower"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+        assert np.allclose(
+            multi_params[key]["upper"],
+            binary_params[key]["upper"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+
+    for key in ["rv", "rva"]:
+        assert np.allclose(
+            multi_params[key],
+            binary_params[key],
+            rtol=1e-9,
+            atol=1e-4,
+        )

From 45dfcf5a7fe98b1d700a21426ce27e81159b3985 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 11 Jun 2025 11:47:11 +0200
Subject: [PATCH 46/84] update single gt tests for did_cs

---
 .../did/tests/test_did_multi_aggregation_single_gt.py     | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/doubleml/did/tests/test_did_multi_aggregation_single_gt.py b/doubleml/did/tests/test_did_multi_aggregation_single_gt.py
index 0f71d91b..a6ffcd49 100644
--- a/doubleml/did/tests/test_did_multi_aggregation_single_gt.py
+++ b/doubleml/did/tests/test_did_multi_aggregation_single_gt.py
@@ -27,6 +27,11 @@ def score(request):
     return request.param
 
 
+@pytest.fixture(scope="module", params=[True, False])
+def panel(request):
+    return request.param
+
+
 @pytest.fixture(scope="module", params=[True, False])
 def in_sample_normalization(request):
     return request.param
@@ -43,7 +48,7 @@ def time_type(request):
 
 
 @pytest.fixture(scope="module")
-def dml_single_gt_aggregation(aggregation, time_type, learner, score, in_sample_normalization, trimming_threshold):
+def dml_single_gt_aggregation(aggregation, time_type, learner, score, panel, in_sample_normalization, trimming_threshold):
     n_obs = 500
     dpg = 1
 
@@ -56,6 +61,7 @@ def dml_single_gt_aggregation(aggregation, time_type, learner, score, in_sample_
     dml_args = {
         "n_folds": 3,
         "score": score,
+        "panel": panel,
         "in_sample_normalization": in_sample_normalization,
         "trimming_threshold": trimming_threshold,
         "draw_sample_splitting": True,

From 29b0ee7114c7d1a99c5c22558c84f6a62e0d3403 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 11 Jun 2025 11:47:35 +0200
Subject: [PATCH 47/84] update exception tests for did cs

---
 doubleml/did/tests/test_did_multi_exceptions.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/doubleml/did/tests/test_did_multi_exceptions.py b/doubleml/did/tests/test_did_multi_exceptions.py
index aead8e48..88d373e3 100644
--- a/doubleml/did/tests/test_did_multi_exceptions.py
+++ b/doubleml/did/tests/test_did_multi_exceptions.py
@@ -18,6 +18,7 @@
     "ml_g": LinearRegression(),
     "ml_m": LogisticRegression(),
     "gt_combinations": [(1, 0, 1)],
+    "panel": True,
 }
 
 
@@ -43,6 +44,12 @@ def test_input():
         invalid_arguments = {"control_group": 0}
         _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments))
 
+    # non boolean panel
+    msg = "panel has to be boolean. test of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        invalid_arguments = {"panel": "test"}
+        _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments))
+
     # propensity score adjustments
     msg = "in_sample_normalization indicator has to be boolean. Object of type <class 'str'> passed."
     with pytest.raises(TypeError, match=msg):
@@ -170,6 +177,12 @@ def test_check_external_predictions():
     valid_pred = {model.gt_labels[0]: {"ml_g0": None, "ml_g1": None, "ml_m": None}}
     model._check_external_predictions(valid_pred)
 
+    model_cs = dml.did.DoubleMLDIDMulti(**valid_arguments | {"panel": False})
+    valid_pred = {
+        model.gt_labels[0]: {"ml_g_d0_t0": None, "ml_g_d0_t1": None, "ml_g_d1_t0": None, "ml_g_d1_t1": None, "ml_m": None}
+    }
+    model_cs._check_external_predictions(valid_pred)
+
 
 @pytest.mark.ci
 def test_exceptions_before_fit():

From 895a7627d2d3c444cb900cacfee5a0f475514556 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 11 Jun 2025 11:47:50 +0200
Subject: [PATCH 48/84] update external prediction tests for did cs

---
 .../tests/test_did_multi_external_predictions.py  | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/doubleml/did/tests/test_did_multi_external_predictions.py b/doubleml/did/tests/test_did_multi_external_predictions.py
index e336487d..9bafdc6f 100644
--- a/doubleml/did/tests/test_did_multi_external_predictions.py
+++ b/doubleml/did/tests/test_did_multi_external_predictions.py
@@ -14,6 +14,11 @@ def did_score(request):
     return request.param
 
 
+@pytest.fixture(scope="module", params=[True, False])
+def panel(request):
+    return request.param
+
+
 @pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
@@ -30,7 +35,7 @@ def set_ml_g_ext(request):
 
 
 @pytest.fixture(scope="module")
-def doubleml_did_multi_ext_fixture(did_score, n_rep, set_ml_m_ext, set_ml_g_ext):
+def doubleml_did_multi_ext_fixture(did_score, panel, n_rep, set_ml_m_ext, set_ml_g_ext):
     n_obs = 500
     n_folds = 5
     dgp = 1
@@ -47,6 +52,7 @@ def doubleml_did_multi_ext_fixture(did_score, n_rep, set_ml_m_ext, set_ml_g_ext)
         "obj_dml_data": dml_panel_data,
         "gt_combinations": [(2, 0, 1)],
         "score": did_score,
+        "panel": panel,
         "n_rep": n_rep,
         "n_folds": n_folds,
     }
@@ -69,9 +75,12 @@ def doubleml_did_multi_ext_fixture(did_score, n_rep, set_ml_m_ext, set_ml_g_ext)
         ml_m_ext = ml_m
 
     if set_ml_g_ext:
+        g_keys = ["ml_g0", "ml_g1"] if panel else ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"]
         for i_gt_combination, gt_label in enumerate(dml_obj.gt_labels):
-            ext_pred_dict[gt_label]["ml_g0"] = dml_obj.modellist[i_gt_combination].predictions["ml_g0"][:, :, 0]
-            ext_pred_dict[gt_label]["ml_g1"] = dml_obj.modellist[i_gt_combination].predictions["ml_g1"][:, :, 0]
+            predictions = dml_obj.modellist[i_gt_combination].predictions
+            for key in g_keys:
+                ext_pred_dict[gt_label][key] = predictions[key][:, :, 0]
+
         ml_g_ext = DMLDummyRegressor()
     else:
         ml_g_ext = ml_g

From b6ace7dae151340d2b2462e9a0d64af6d0b7ce0e Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 11 Jun 2025 11:48:02 +0200
Subject: [PATCH 49/84] update placebo tests for did cs multi

---
 doubleml/did/tests/test_did_multi_placebo.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/doubleml/did/tests/test_did_multi_placebo.py b/doubleml/did/tests/test_did_multi_placebo.py
index 8f01d426..12435871 100644
--- a/doubleml/did/tests/test_did_multi_placebo.py
+++ b/doubleml/did/tests/test_did_multi_placebo.py
@@ -12,13 +12,18 @@ def did_score(request):
     return request.param
 
 
+@pytest.fixture(scope="module", params=[True, False])
+def panel(request):
+    return request.param
+
+
 @pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def doubleml_did_fixture(did_score, n_rep):
+def doubleml_did_fixture(did_score, panel, n_rep):
     n_obs = 1000
     dgp = 5  # has to be experimental (for experimental score to be valid)
     np.random.seed(42)
@@ -36,6 +41,7 @@ def doubleml_did_fixture(did_score, n_rep):
         "ml_m": LogisticRegression(),
         "gt_combinations": gt_combinations,
         "score": did_score,
+        "panel": panel,
         "n_rep": n_rep,
         "n_folds": 5,
         "draw_sample_splitting": True,

From 176a99d8b3ac2dd8728a5a08368be3d974f23e86 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 11 Jun 2025 13:34:06 +0200
Subject: [PATCH 50/84] update plot and return type tests for did multi

---
 doubleml/did/did_multi.py                     |  5 ++
 doubleml/did/tests/test_did_multi_plot.py     |  8 +++-
 .../did/tests/test_did_multi_return_types.py  | 47 ++++++++++++++-----
 3 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/doubleml/did/did_multi.py b/doubleml/did/did_multi.py
index c8f54313..646ad41d 100644
--- a/doubleml/did/did_multi.py
+++ b/doubleml/did/did_multi.py
@@ -187,6 +187,11 @@ def __init__(
 
         _check_bool(panel, "panel")
         self._panel = panel
+        # set score dim (n_elements, n_thetas, n_rep), just for checking purposes
+        if self.panel:
+            self._score_dim = (self._dml_data.n_ids, self.n_gt_atts, self.n_rep)
+        else:
+            self._score_dim = (self._dml_data.n_obs, self.n_gt_atts, self.n_rep)
 
         # initialize framework which is constructed after the fit method is called
         self._framework = None
diff --git a/doubleml/did/tests/test_did_multi_plot.py b/doubleml/did/tests/test_did_multi_plot.py
index 2eb15dcc..bcb8b786 100644
--- a/doubleml/did/tests/test_did_multi_plot.py
+++ b/doubleml/did/tests/test_did_multi_plot.py
@@ -13,13 +13,18 @@ def did_score(request):
     return request.param
 
 
+@pytest.fixture(scope="module", params=[True, False])
+def panel(request):
+    return request.param
+
+
 @pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def doubleml_did_fixture(did_score, n_rep):
+def doubleml_did_fixture(did_score, panel, n_rep):
     n_obs = 1000
     dgp = 5  # has to be experimental (for experimental score to be valid)
     np.random.seed(42)
@@ -32,6 +37,7 @@ def doubleml_did_fixture(did_score, n_rep):
         "ml_m": LogisticRegression(),
         "gt_combinations": "all",
         "score": did_score,
+        "panel": panel,
         "n_rep": n_rep,
         "n_folds": 2,
         "draw_sample_splitting": True,
diff --git a/doubleml/did/tests/test_did_multi_return_types.py b/doubleml/did/tests/test_did_multi_return_types.py
index c11544ed..d797230e 100644
--- a/doubleml/did/tests/test_did_multi_return_types.py
+++ b/doubleml/did/tests/test_did_multi_return_types.py
@@ -13,7 +13,7 @@
 from doubleml.double_ml_framework import DoubleMLFramework
 
 # Test constants
-N_OBS = 200
+N_IDS = 200
 N_REP = 1
 N_FOLDS = 3
 N_REP_BOOT = 314
@@ -31,7 +31,7 @@
 datasets = {}
 
 # panel data
-df_panel = make_did_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=N_PERIODS, time_type="float")
+df_panel = make_did_CS2021(n_obs=N_IDS, dgp_type=1, n_pre_treat_periods=2, n_periods=N_PERIODS, time_type="float")
 df_panel["y_binary"] = np.random.binomial(n=1, p=0.5, size=df_panel.shape[0])
 datasets["did_panel"] = DoubleMLPanelData(
     df_panel, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]
@@ -42,10 +42,23 @@
 
 
 dml_objs = [
-    (DoubleMLDIDMulti(datasets["did_panel"], ml_g=Lasso(), ml_m=LogisticRegression(), **dml_args), DoubleMLDIDMulti),
+    (
+        DoubleMLDIDMulti(datasets["did_panel"], panel=True, ml_g=Lasso(), ml_m=LogisticRegression(), **dml_args),
+        DoubleMLDIDMulti,
+    ),
+    (
+        DoubleMLDIDMulti(datasets["did_panel"], panel=False, ml_g=Lasso(), ml_m=LogisticRegression(), **dml_args),
+        DoubleMLDIDMulti,
+    ),
+    (
+        DoubleMLDIDMulti(
+            datasets["did_panel_binary_outcome"], panel=True, ml_g=LogisticRegression(), ml_m=LogisticRegression(), **dml_args
+        ),
+        DoubleMLDIDMulti,
+    ),
     (
         DoubleMLDIDMulti(
-            datasets["did_panel_binary_outcome"], ml_g=LogisticRegression(), ml_m=LogisticRegression(), **dml_args
+            datasets["did_panel_binary_outcome"], panel=False, ml_g=LogisticRegression(), ml_m=LogisticRegression(), **dml_args
         ),
         DoubleMLDIDMulti,
     ),
@@ -84,13 +97,20 @@ def test_panel_property_types_and_shapes(fitted_dml_obj):
     n_treat = len(fitted_dml_obj.gt_combinations)
     dml_obj = fitted_dml_obj
 
+    if dml_obj.panel:
+        score_dim = (N_IDS, n_treat, N_REP)
+    else:
+        score_dim = (df_panel.shape[0], n_treat, N_REP)
+
+    assert dml_obj._score_dim == score_dim
+
     # check_basic_property_types_and_shapes
     # check that the setting is still in line with the hard-coded values
     assert dml_obj._dml_data.n_treat == 1
     assert dml_obj.n_gt_atts == n_treat
     assert dml_obj.n_rep == N_REP
     assert dml_obj.n_folds == N_FOLDS
-    assert dml_obj._dml_data.n_obs == N_OBS * N_PERIODS
+    assert dml_obj._dml_data.n_obs == df_panel.shape[0]
     assert dml_obj.n_rep_boot == N_REP_BOOT
 
     assert isinstance(dml_obj.all_coef, np.ndarray)
@@ -112,11 +132,7 @@ def test_panel_property_types_and_shapes(fitted_dml_obj):
     assert dml_obj.t_stat.shape == (n_treat,)
 
     assert isinstance(dml_obj.framework.scaled_psi, np.ndarray)
-    assert dml_obj.framework.scaled_psi.shape == (
-        N_OBS,
-        n_treat,
-        N_REP,
-    )
+    assert dml_obj.framework.scaled_psi.shape == score_dim
 
     assert isinstance(dml_obj.framework, DoubleMLFramework)
     assert isinstance(dml_obj.pval, np.ndarray)
@@ -126,7 +142,10 @@ def test_panel_property_types_and_shapes(fitted_dml_obj):
     assert len(dml_obj._dml_data.binary_treats) == 1
 
     # check_basic_predictions_and_targets
-    expected_keys = ["ml_g0", "ml_g1", "ml_m"]
+    if dml_obj.panel:
+        expected_keys = ["ml_g0", "ml_g1", "ml_m"]
+    else:
+        expected_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1", "ml_m"]
     for key in expected_keys:
         assert isinstance(dml_obj.nuisance_loss[key], np.ndarray)
         assert dml_obj.nuisance_loss[key].shape == (N_REP, n_treat)
@@ -137,6 +156,10 @@ def test_panel_sensitivity_return_types(fitted_dml_obj):
     n_treat = len(fitted_dml_obj.gt_combinations)
     benchmarking_set = [fitted_dml_obj._dml_data.x_cols[0]]
     dml_obj = fitted_dml_obj
+    if dml_obj.panel:
+        score_dim = (N_IDS, n_treat, N_REP)
+    else:
+        score_dim = (df_panel.shape[0], n_treat, N_REP)
 
     assert isinstance(dml_obj.sensitivity_elements, dict)
     for key in ["sigma2", "nu2", "max_bias"]:
@@ -144,7 +167,7 @@ def test_panel_sensitivity_return_types(fitted_dml_obj):
         assert dml_obj.sensitivity_elements[key].shape == (1, n_treat, N_REP)
     for key in ["psi_max_bias"]:
         assert isinstance(dml_obj.sensitivity_elements[key], np.ndarray)
-        assert dml_obj.sensitivity_elements[key].shape == (N_OBS, n_treat, N_REP)
+        assert dml_obj.sensitivity_elements[key].shape == score_dim
 
     assert isinstance(dml_obj.sensitivity_summary, str)
     dml_obj.sensitivity_analysis()

From 9d59e5b1e378a2aa9c573b92aad02267cf3da586 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 11 Jun 2025 13:53:45 +0200
Subject: [PATCH 51/84] add additional did multi aggregation test

---
 ...st_did_multi_aggregation_manual_weights.py | 199 +++++++++++++++++-
 ...test_did_multi_aggregation_weight_index.py |   1 -
 2 files changed, 198 insertions(+), 2 deletions(-)
 delete mode 100644 doubleml/did/tests/test_did_multi_aggregation_weight_index.py

diff --git a/doubleml/did/tests/test_did_multi_aggregation_manual_weights.py b/doubleml/did/tests/test_did_multi_aggregation_manual_weights.py
index 35512d8f..57b00b31 100644
--- a/doubleml/did/tests/test_did_multi_aggregation_manual_weights.py
+++ b/doubleml/did/tests/test_did_multi_aggregation_manual_weights.py
@@ -1 +1,198 @@
-# TODO: For each aggregation method check if the manual weights equal the string aggregation method.
+import math
+
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+import doubleml as dml
+from doubleml.did.datasets import make_did_CS2021
+from doubleml.did.utils._aggregation import (
+    _compute_did_eventstudy_aggregation_weights,
+    _compute_did_group_aggregation_weights,
+    _compute_did_time_aggregation_weights,
+)
+
+
+@pytest.fixture(scope="module", params=["group", "time", "eventstudy"])
+def aggregation_method(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def panel(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["observational", "experimental"])
+def score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def dml_fitted_obj(panel, score):
+    """Create a fitted DML object for testing."""
+    n_obs = 200
+
+    # Create data
+    df = make_did_CS2021(n_obs=n_obs, dgp_type=1, time_type="float")
+    dml_data = dml.data.DoubleMLPanelData(df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"])
+
+    # Create and fit model
+    ml_g = LinearRegression()
+    ml_m = LogisticRegression(solver="lbfgs", max_iter=250)
+
+    dml_obj = dml.did.DoubleMLDIDMulti(
+        obj_dml_data=dml_data,
+        ml_g=ml_g,
+        ml_m=ml_m,
+        gt_combinations="standard",
+        panel=panel,
+        score=score,
+        n_folds=3,
+        n_rep=1,
+    )
+    dml_obj.fit()
+
+    return dml_obj
+
+
+def _extract_manual_weights(dml_obj, aggregation_method):
+    """Extract manual weights from the aggregation method."""
+    # Get the mask for non-masked values
+    selected_gt_mask = ~dml_obj.gt_index.mask
+
+    if aggregation_method == "group":
+        # Exclude pre-treatment combinations for group aggregation
+        selected_gt_mask = selected_gt_mask & dml_obj._post_treatment_mask
+        aggregation_dict = _compute_did_group_aggregation_weights(
+            gt_index=dml_obj.gt_index,
+            g_values=dml_obj.g_values,
+            d_values=dml_obj._dml_data.d,
+            selected_gt_mask=selected_gt_mask,
+        )
+        aggregation_dict["method"] = "Group"
+    elif aggregation_method == "time":
+        # Exclude pre-treatment combinations for time aggregation
+        selected_gt_mask = selected_gt_mask & dml_obj._post_treatment_mask
+        aggregation_dict = _compute_did_time_aggregation_weights(
+            gt_index=dml_obj.gt_index,
+            g_values=dml_obj.g_values,
+            t_values=dml_obj.t_values,
+            d_values=dml_obj._dml_data.d,
+            selected_gt_mask=selected_gt_mask,
+        )
+        aggregation_dict["method"] = "Time"
+    else:
+        assert aggregation_method == "eventstudy"
+        aggregation_dict = _compute_did_eventstudy_aggregation_weights(
+            gt_index=dml_obj.gt_index,
+            g_values=dml_obj.g_values,
+            t_values=dml_obj.t_values,
+            d_values=dml_obj._dml_data.d,
+            time_values=dml_obj._dml_data.t,
+            selected_gt_mask=selected_gt_mask,
+        )
+        aggregation_dict["method"] = "Event Study"
+    return aggregation_dict
+
+
+@pytest.mark.ci
+def test_string_vs_manual_weights_aggregation(dml_fitted_obj, aggregation_method):
+    """Test that string aggregation methods produce identical results to manual weights."""
+
+    # Get string-based aggregation result
+    agg_string = dml_fitted_obj.aggregate(aggregation=aggregation_method)
+
+    # Extract manual weights
+    manual_weights_dict = _extract_manual_weights(dml_fitted_obj, aggregation_method)
+
+    # Get manual aggregation result
+    agg_manual = dml_fitted_obj.aggregate(aggregation=manual_weights_dict)
+
+    # Compare aggregated frameworks - coefficients
+    np.testing.assert_allclose(
+        agg_string.aggregated_frameworks.thetas,
+        agg_manual.aggregated_frameworks.thetas,
+        rtol=1e-9,
+        atol=1e-12,
+    )
+
+    # Compare aggregated frameworks - standard errors
+    np.testing.assert_allclose(
+        agg_string.aggregated_frameworks.ses,
+        agg_manual.aggregated_frameworks.ses,
+        rtol=1e-9,
+        atol=1e-12,
+    )
+
+    # Compare overall aggregated framework - coefficients
+    np.testing.assert_allclose(
+        agg_string.overall_aggregated_framework.thetas,
+        agg_manual.overall_aggregated_framework.thetas,
+        rtol=1e-9,
+        atol=1e-12,
+    )
+
+    # Compare overall aggregated framework - standard errors
+    np.testing.assert_allclose(
+        agg_string.overall_aggregated_framework.ses,
+        agg_manual.overall_aggregated_framework.ses,
+        rtol=1e-9,
+        atol=1e-12,
+    )
+
+    # Compare aggregation weights
+    np.testing.assert_allclose(
+        agg_string.aggregation_weights,
+        agg_manual.aggregation_weights,
+        rtol=1e-9,
+        atol=1e-12,
+    )
+
+    # Compare overall aggregation weights
+    np.testing.assert_allclose(
+        agg_string.overall_aggregation_weights,
+        agg_manual.overall_aggregation_weights,
+        rtol=1e-9,
+        atol=1e-12,
+    )
+
+    # Compare aggregation names
+    assert agg_string.aggregation_names == agg_manual.aggregation_names
+
+    # Compare number of aggregations
+    assert agg_string.n_aggregations == agg_manual.n_aggregations
+
+
+@pytest.mark.ci
+def test_manual_weights_properties(dml_fitted_obj, aggregation_method):
+    """Test that manual weights have the expected properties."""
+
+    manual_weights_dict = _extract_manual_weights(dml_fitted_obj, aggregation_method)
+
+    # Check that required keys are present
+    assert "weight_masks" in manual_weights_dict
+    assert "agg_names" in manual_weights_dict
+    assert "agg_weights" in manual_weights_dict
+
+    weight_masks = manual_weights_dict["weight_masks"]
+    agg_weights = manual_weights_dict["agg_weights"]
+
+    # Check weight masks properties
+    assert isinstance(weight_masks, np.ma.MaskedArray)
+    assert weight_masks.ndim == 4
+    assert weight_masks.shape[:-1] == dml_fitted_obj.gt_index.shape
+
+    # Check that aggregation weights sum to 1
+    assert math.isclose(np.sum(agg_weights), 1.0, rel_tol=1e-9, abs_tol=1e-12)
+
+    # Check that individual weight masks sum to 1 (for non-masked elements)
+    n_aggregations = weight_masks.shape[-1]
+    for i in range(n_aggregations):
+        weights = weight_masks[..., i].compressed()
+        if len(weights) > 0:
+            assert math.isclose(np.sum(weights), 1.0, rel_tol=1e-9, abs_tol=1e-12)
+
+    # Check that weight masks have the same mask as gt_index
+    for i in range(n_aggregations):
+        np.testing.assert_array_equal(weight_masks[..., i].mask, dml_fitted_obj.gt_index.mask)
diff --git a/doubleml/did/tests/test_did_multi_aggregation_weight_index.py b/doubleml/did/tests/test_did_multi_aggregation_weight_index.py
deleted file mode 100644
index d001a4a8..00000000
--- a/doubleml/did/tests/test_did_multi_aggregation_weight_index.py
+++ /dev/null
@@ -1 +0,0 @@
-# TODO: For each aggregation method check if the aggregated weights correspond to certain gt_combinations (group, time etc.)

From f27bf2068ec30a64c1b8b37af4628172eb05b3d5 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Wed, 11 Jun 2025 14:55:49 +0200
Subject: [PATCH 52/84] some progress on refactoring the data backends.

---
 doubleml/data/tests/test_cluster_data.py | 53 ++++++++++--------------
 1 file changed, 21 insertions(+), 32 deletions(-)

diff --git a/doubleml/data/tests/test_cluster_data.py b/doubleml/data/tests/test_cluster_data.py
index 4489e528..9de9294c 100644
--- a/doubleml/data/tests/test_cluster_data.py
+++ b/doubleml/data/tests/test_cluster_data.py
@@ -2,7 +2,7 @@
 import pandas as pd
 import pytest
 
-from doubleml import DoubleMLData
+from doubleml import DoubleMLData, DoubleMLDIDData, DoubleMLSSMData
 from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018
 
 
@@ -11,29 +11,29 @@ def test_obj_vs_from_arrays():
     np.random.seed(3141)
     dml_data = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
     dml_data_from_array = DoubleMLData.from_arrays(
-        dml_data.data[dml_data.x_cols],
-        dml_data.data[dml_data.y_col],
-        dml_data.data[dml_data.d_cols],
-        dml_data.data[dml_data.cluster_cols],
-        dml_data.data[dml_data.z_cols],
+        x=dml_data.data[dml_data.x_cols],
+        y=dml_data.data[dml_data.y_col],
+        d=dml_data.data[dml_data.d_cols],
+        cluster_vars=dml_data.data[dml_data.cluster_cols],
+        z=dml_data.data[dml_data.z_cols],
     )
     df = dml_data.data.copy()
     df.rename(
         columns={"cluster_var_i": "cluster_var1", "cluster_var_j": "cluster_var2", "Y": "y", "D": "d", "Z": "z"}, inplace=True
     )
-    assert dml_data_from_array.data.equals(df)
+    assert dml_data_from_array.data[list(df.columns)].equals(df)
 
     # with a single cluster variable
     dml_data_from_array = DoubleMLData.from_arrays(
-        dml_data.data[dml_data.x_cols],
-        dml_data.data[dml_data.y_col],
-        dml_data.data[dml_data.d_cols],
-        dml_data.data[dml_data.cluster_cols[1]],
-        dml_data.data[dml_data.z_cols],
+        x=dml_data.data[dml_data.x_cols],
+        y=dml_data.data[dml_data.y_col],
+        d=dml_data.data[dml_data.d_cols],
+        cluster_vars=dml_data.data[dml_data.cluster_cols[1]],
+        z=dml_data.data[dml_data.z_cols],
     )
     df = dml_data.data.copy().drop(columns="cluster_var_i")
     df.rename(columns={"cluster_var_j": "cluster_var", "Y": "y", "D": "d", "Z": "z"}, inplace=True)
-    assert dml_data_from_array.data.equals(df)
+    assert dml_data_from_array.data[list(df.columns)].equals(df)
 
 
 @pytest.mark.ci
@@ -53,32 +53,22 @@ def test_x_cols_setter_defaults_w_cluster():
 
     # without instrument and with time
     df = pd.DataFrame(np.tile(np.arange(6), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "tt", "cluster1"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", t_col="tt")
+    dml_data = DoubleMLDIDData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", t_col="tt")
     assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with instrument and with time
     df = pd.DataFrame(np.tile(np.arange(7), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "tt", "cluster1"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", t_col="tt")
+    dml_data = DoubleMLDIDData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", t_col="tt")
     assert dml_data.x_cols == ["xx1", "xx2"]
 
     # without instrument and with selection
     df = pd.DataFrame(np.tile(np.arange(6), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "ss", "cluster1"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", s_col="ss")
+    dml_data = DoubleMLSSMData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", s_col="ss")
     assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with instrument and with selection
     df = pd.DataFrame(np.tile(np.arange(7), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "ss", "cluster1"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", s_col="ss")
-    assert dml_data.x_cols == ["xx1", "xx2"]
-
-    # without instrument with time with selection
-    df = pd.DataFrame(np.tile(np.arange(7), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "tt", "ss", "cluster1"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", t_col="tt", s_col="ss")
-    assert dml_data.x_cols == ["xx1", "xx2"]
-
-    # with instrument with time with selection
-    df = pd.DataFrame(np.tile(np.arange(8), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "tt", "ss", "cluster1"])
-    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", t_col="tt", s_col="ss")
+    dml_data = DoubleMLSSMData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", s_col="ss")
     assert dml_data.x_cols == ["xx1", "xx2"]
 
 
@@ -107,7 +97,7 @@ def test_cluster_cols_setter():
     with pytest.raises(ValueError, match=msg):
         dml_data.cluster_cols = "X13"
 
-    msg = r"The cluster variable\(s\) cluster_cols must be of str or list type. " "5 of type <class 'int'> was passed."
+    msg = r"The cluster variable\(s\) cluster_cols must be of str or list type (or None). " "5 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_data.cluster_cols = 5
 
@@ -154,14 +144,14 @@ def test_disjoint_sets():
         r"and cluster variable\(s\) \(``cluster_cols``\)."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], t_col="xx2", cluster_cols="xx2")
+        _ = DoubleMLDIDData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], t_col="xx2", cluster_cols="xx2")
 
     msg = (
         r"At least one variable/column is set as score or selection variable \(``s_col``\) "
         r"and cluster variable\(s\) \(``cluster_cols``\)."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], s_col="xx2", cluster_cols="xx2")
+        _ = DoubleMLSSMData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], s_col="xx2", cluster_cols="xx2")
 
 
 @pytest.mark.ci
@@ -215,14 +205,13 @@ def test_cluster_data_str():
     df["time_var"] = 1
     df["score_var"] = 0.5
 
-    dml_data_with_optional = DoubleMLData(
+    dml_data_with_optional = DoubleMLDIDData(
         data=df,
         y_col="Y",
         d_cols="D",
         cluster_cols=["cluster_var_i", "cluster_var_j"],
         z_cols="Z",
         t_col="time_var",
-        s_col="score_var",
     )
 
     dml_str_optional = str(dml_data_with_optional)

From 9e3e6d62a00a9cf3b3394476cd7fbf71e30ea31f Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 12 Jun 2025 09:25:55 +0200
Subject: [PATCH 53/84] update did cs multi test for cs data

---
 doubleml/did/tests/test_did_multi_vs_cs_binary.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/doubleml/did/tests/test_did_multi_vs_cs_binary.py b/doubleml/did/tests/test_did_multi_vs_cs_binary.py
index 59886854..7af8d74d 100644
--- a/doubleml/did/tests/test_did_multi_vs_cs_binary.py
+++ b/doubleml/did/tests/test_did_multi_vs_cs_binary.py
@@ -6,7 +6,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.did.datasets import make_did_CS2021
+from doubleml.did.datasets import make_did_cs_CS2021
 from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 
@@ -44,15 +44,20 @@ def time_type(request):
     return request.param
 
 
+@pytest.fixture(scope="module", params=[0.5, 0.1])
+def lambda_t(request):
+    return request.param
+
+
 @pytest.fixture(scope="module")
-def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold):
+def dml_did_binary_vs_did_multi_fixture(time_type, lambda_t, learner, score, in_sample_normalization, trimming_threshold):
     n_obs = 500
     dpg = 1
     boot_methods = ["normal"]
     n_rep_boot = 500
 
     # collect data
-    df = make_did_CS2021(n_obs=n_obs, dgp_type=dpg, time_type=time_type)
+    df = make_did_cs_CS2021(n_obs=n_obs, dgp_type=dpg, time_type=time_type, lambda_t=lambda_t)
     dml_panel_data = dml.data.DoubleMLPanelData(
         df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]
     )

From 5c4d1e25a2c0e9560e6af3f01ac287e933367f81 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 12 Jun 2025 12:57:17 +0200
Subject: [PATCH 54/84] update did binary to work with unbalanced panels

---
 doubleml/did/did_binary.py                    |  7 +-
 ..._binary_external_predictions_unbalanced.py | 93 +++++++++++++++++++
 2 files changed, 97 insertions(+), 3 deletions(-)
 create mode 100644 doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py

diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py
index a9939c97..6fa19e0d 100644
--- a/doubleml/did/did_binary.py
+++ b/doubleml/did/did_binary.py
@@ -421,9 +421,10 @@ def _preprocess_data(self, g_value, pre_t, eval_t):
         id_col = self._dml_data.id_col
         g_col = self._dml_data.g_col
 
-        # relevent data subset
-        data_subset_indicator = data[t_col].isin([pre_t, eval_t])
-        data_subset = data[data_subset_indicator].sort_values(by=[id_col, t_col])
+        # relevent data subset: Only include units which are observed in both periods
+        relevant_time_data = data[data[t_col].isin([pre_t, eval_t])]
+        ids_with_both_periods_filter = relevant_time_data.groupby(id_col)[t_col].transform("nunique") == 2
+        data_subset = relevant_time_data[ids_with_both_periods_filter].sort_values(by=[id_col, t_col])
 
         # Construct G (treatment group) indicating treatment period in g
         G_indicator = (data_subset[g_col] == g_value).astype(int)
diff --git a/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py b/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py
new file mode 100644
index 00000000..ffeadb51
--- /dev/null
+++ b/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py
@@ -0,0 +1,93 @@
+import math
+
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+from doubleml.data import DoubleMLPanelData
+from doubleml.did import DoubleMLDIDBinary
+from doubleml.did.datasets import make_did_cs_CS2021
+from doubleml.tests._utils import draw_smpls
+from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
+
+
+@pytest.fixture(scope="module", params=["observational", "experimental"])
+def did_score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_did_panel_fixture(did_score, n_rep):
+    n_obs = 500
+    n_folds = 5
+    dgp = 1
+
+    ext_predictions = {"d": {}}
+    df = make_did_cs_CS2021(n_obs=n_obs, dgp_type=dgp, time_type="float")
+    dml_panel_data = DoubleMLPanelData(df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"])
+
+    kwargs = {
+        "obj_dml_data": dml_panel_data,
+        "g_value": 2,
+        "t_value_pre": 0,
+        "t_value_eval": 1,
+        "score": did_score,
+        "n_rep": n_rep,
+        "draw_sample_splitting": False,
+    }
+
+    dml_did = DoubleMLDIDBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_panel)
+    dml_did.set_sample_splitting(all_smpls)
+
+    np.random.seed(3141)
+    dml_did.fit(store_predictions=True)
+
+    all_keys = ["ml_g0", "ml_g1"]
+    for key in all_keys:
+        ext_predictions["d"][key] = dml_did.predictions[key][:, :, 0]
+    if did_score == "observational":
+        ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0]
+    dml_did_ext = DoubleMLDIDBinary(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs)
+    dml_did_ext.set_sample_splitting(all_smpls)
+    np.random.seed(3141)
+    dml_did_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {
+        "coef": dml_did.coef[0],
+        "coef_ext": dml_did_ext.coef[0],
+        "se": dml_did.se[0],
+        "se_ext": dml_did_ext.se[0],
+        "score": dml_did.psi,
+        "score_ext": dml_did_ext.psi,
+        "dml_did_nuisance_loss": dml_did.nuisance_loss,
+        "dml_did_ext_nuisance_loss": dml_did_ext.nuisance_loss,
+    }
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_panel_coef(doubleml_did_panel_fixture):
+    assert math.isclose(doubleml_did_panel_fixture["coef"], doubleml_did_panel_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3)
+
+
+@pytest.mark.ci
+def test_panel_se(doubleml_did_panel_fixture):
+    assert math.isclose(doubleml_did_panel_fixture["se"], doubleml_did_panel_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-3)
+
+
+@pytest.mark.ci
+def test_panel_score(doubleml_did_panel_fixture):
+    assert np.allclose(doubleml_did_panel_fixture["score"], doubleml_did_panel_fixture["score_ext"], rtol=1e-9, atol=1e-3)
+
+
+@pytest.mark.ci
+def test_panel_nuisance_loss(doubleml_did_panel_fixture):
+    for key, value in doubleml_did_panel_fixture["dml_did_nuisance_loss"].items():
+        assert np.allclose(value, doubleml_did_panel_fixture["dml_did_ext_nuisance_loss"][key], rtol=1e-9, atol=1e-3)

From 8437d79f0edf50f69bc2de690762c951c11a0cad Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Thu, 12 Jun 2025 14:55:52 +0200
Subject: [PATCH 55/84] formatting issue

---
 doubleml/data/base_data.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/doubleml/data/base_data.py b/doubleml/data/base_data.py
index 9ba8bc00..2297944e 100644
--- a/doubleml/data/base_data.py
+++ b/doubleml/data/base_data.py
@@ -11,10 +11,7 @@
 
 
 class DoubleMLBaseData(ABC):
-    """Bas        x_cols = [f"X{i + 1}" for i in np.arange(x.shape[1])]
-    # baseline version with features, outcome and treatments
-    data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + [y_col] + d_cols)Class Double machine learning data-backends
-    """
+    """Base Class Double machine learning data-backends"""
 
     def __init__(self, data):
         if not isinstance(data, pd.DataFrame):

From e58f55038ca173293dc2a6e0d41b6d8f2ecadb1b Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Thu, 12 Jun 2025 15:06:07 +0200
Subject: [PATCH 56/84] updt. unit tests

---
 doubleml/data/tests/test_cluster_data.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doubleml/data/tests/test_cluster_data.py b/doubleml/data/tests/test_cluster_data.py
index 9de9294c..91627158 100644
--- a/doubleml/data/tests/test_cluster_data.py
+++ b/doubleml/data/tests/test_cluster_data.py
@@ -140,15 +140,15 @@ def test_disjoint_sets():
         _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], z_cols=["xx2"], cluster_cols="xx2")
 
     msg = (
-        r"At least one variable/column is set as time variable \(``t_col``\) "
-        r"and cluster variable\(s\) \(``cluster_cols``\)."
+        r"At least one variable/column is set as cluster variable\(s\) \(``cluster_cols``\) "
+        r"and time variable \(``t_col``\)."
     )
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLDIDData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], t_col="xx2", cluster_cols="xx2")
 
     msg = (
-        r"At least one variable/column is set as score or selection variable \(``s_col``\) "
-        r"and cluster variable\(s\) \(``cluster_cols``\)."
+        r"At least one variable/column is set as cluster variable\(s\) \(``cluster_cols``\) "
+        r"and selection variable \(``s_col``\)."
     )
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLSSMData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], s_col="xx2", cluster_cols="xx2")

From a2deba93923426341520611aaea0f2158819f10a Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Thu, 12 Jun 2025 15:17:41 +0200
Subject: [PATCH 57/84] fix cluster DGP to use corret data backend

---
 .../datasets/dgp_pliv_multiway_cluster_CKMS2021.py   | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py b/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
index df2b4cbe..39ff6a26 100644
--- a/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
+++ b/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
@@ -2,11 +2,11 @@
 import pandas as pd
 from scipy.linalg import toeplitz
 
-from doubleml.data import DoubleMLClusterData
-from doubleml.utils._aliases import _array_alias, _data_frame_alias, _dml_cluster_data_alias
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _array_alias, _data_frame_alias, _dml_data_alias
 
 
-def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1.0, return_type="DoubleMLClusterData", **kwargs):
+def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1.0, return_type="DoubleMLData", **kwargs):
     """
     Generates data from a partially linear IV regression model with multiway cluster sample used in Chiang et al.
     (2021). The data generating process is defined as
@@ -188,12 +188,14 @@ def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1.0, return
 
     if return_type in _array_alias:
         return x, y, d, cluster_vars.values, z
-    elif return_type in _data_frame_alias + _dml_cluster_data_alias:
+    elif return_type in _data_frame_alias + _dml_data_alias:
         x_cols = [f"X{i + 1}" for i in np.arange(dim_X)]
         data = pd.concat((cluster_vars, pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["Y", "D", "Z"])), axis=1)
         if return_type in _data_frame_alias:
             return data
         else:
-            return DoubleMLClusterData(data, "Y", "D", cluster_cols, x_cols, "Z")
+            return DoubleMLData(
+                data, y_col="Y", d_cols="D", cluster_cols=cluster_cols, x_cols=x_cols, z_cols="Z", is_cluster_data=True
+            )
     else:
         raise ValueError("Invalid return_type.")

From cb1168484015670f07bdba59344e716964d7e995 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Thu, 12 Jun 2025 15:17:47 +0200
Subject: [PATCH 58/84] update unit tests

---
 doubleml/data/tests/test_cluster_data.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/doubleml/data/tests/test_cluster_data.py b/doubleml/data/tests/test_cluster_data.py
index 91627158..a2cd726f 100644
--- a/doubleml/data/tests/test_cluster_data.py
+++ b/doubleml/data/tests/test_cluster_data.py
@@ -97,7 +97,7 @@ def test_cluster_cols_setter():
     with pytest.raises(ValueError, match=msg):
         dml_data.cluster_cols = "X13"
 
-    msg = r"The cluster variable\(s\) cluster_cols must be of str or list type (or None). " "5 of type <class 'int'> was passed."
+    msg = r"The cluster variable\(s\) cluster_cols must be of str or list type \(or None\)\. " "5 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_data.cluster_cols = 5
 
@@ -161,7 +161,7 @@ def test_duplicates():
 
     msg = r"Invalid cluster variable\(s\) cluster_cols: Contains duplicate values."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(dml_cluster_data.data, y_col="y", d_cols=["d"], cluster_cols=["X3", "X2", "X3"])
+        _ = DoubleMLData(dml_cluster_data.data, y_col="Y", d_cols=["D"], cluster_cols=["X3", "X2", "X3"], is_cluster_data=True)
     with pytest.raises(ValueError, match=msg):
         dml_cluster_data.cluster_cols = ["X3", "X2", "X3"]
 
@@ -215,5 +215,4 @@ def test_cluster_data_str():
     )
 
     dml_str_optional = str(dml_data_with_optional)
-    assert "Time variable: time_var" in dml_str_optional
-    assert "Score/Selection variable: score_var" in dml_str_optional
+    assert "Time variable: time_var" in dml_str_optional
\ No newline at end of file

From 3fe83ff6683cd6d609cb841dda453ce24a7a2d5c Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 12 Jun 2025 15:21:07 +0200
Subject: [PATCH 59/84] align subset naming in did binary and cs version

---
 doubleml/did/did_binary.py                    | 32 +++++++++----------
 doubleml/did/did_cs_binary.py                 | 26 +++++++--------
 .../test_did_binary_external_predictions.py   |  4 +--
 ..._binary_external_predictions_unbalanced.py |  2 +-
 .../did/tests/test_did_binary_vs_did_panel.py |  2 +-
 ...test_did_cs_binary_external_predictions.py |  2 +-
 doubleml/did/tests/test_return_types.py       | 12 +++----
 7 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py
index 6fa19e0d..99e18e28 100644
--- a/doubleml/did/did_binary.py
+++ b/doubleml/did/did_binary.py
@@ -163,10 +163,10 @@ def __init__(
 
         # Preprocess data
         # Y1, Y0 might be needed if we want to support custom estimators and scores; currently only output y_diff
-        self._panel_data_wide = self._preprocess_data(self._g_value, self._t_value_pre, self._t_value_eval)
+        self._data_subset = self._preprocess_data(self._g_value, self._t_value_pre, self._t_value_eval)
 
         # Handling id values to match pairwise evaluation & simultaneous inference
-        id_panel_data = self._panel_data_wide[self._dml_data.id_col].values
+        id_panel_data = self._data_subset[self._dml_data.id_col].values
         id_original = self._dml_data.id_var_unique
         if not np.all(np.isin(id_panel_data, id_original)):
             raise ValueError("The id values in the panel data are not a subset of the original id values.")
@@ -177,13 +177,13 @@ def __init__(
 
         # Numeric values for positions of the entries in id_panel_data inside id_original
         # np.nonzero(np.isin(id_original, id_panel_data))
-        self._n_obs_subset = self._panel_data_wide.shape[0]  # Effective sample size used for resampling
-        self._n_treated_subset = self._panel_data_wide["G_indicator"].sum()
+        self._n_obs_subset = self._data_subset.shape[0]  # Effective sample size used for resampling
+        self._n_treated_subset = self._data_subset["G_indicator"].sum()
 
         # Save x and y for later ML estimation
-        self._x_panel = self._panel_data_wide.loc[:, self._dml_data.x_cols].values
-        self._y_panel = self._panel_data_wide.loc[:, "y_diff"].values
-        self._g_panel = self._panel_data_wide.loc[:, "G_indicator"].values
+        self._x_data_subset = self._data_subset.loc[:, self._dml_data.x_cols].values
+        self._y_data_subset = self._data_subset.loc[:, "y_diff"].values
+        self._g_data_subset = self._data_subset.loc[:, "G_indicator"].values
 
         valid_scores = ["observational", "experimental"]
         _check_score(self.score, valid_scores, allow_callable=False)
@@ -196,7 +196,7 @@ def __init__(
             )
 
         # set stratication for resampling
-        self._strata = self._panel_data_wide["G_indicator"]
+        self._strata = self._data_subset["G_indicator"]
         self._n_obs_sample_splitting = self.n_obs_subset
         if draw_sample_splitting:
             self.draw_sample_splitting()
@@ -342,11 +342,11 @@ def anticipation_periods(self):
         return self._anticipation_periods
 
     @property
-    def panel_data_wide(self):
+    def data_subset(self):
         """
         The preprocessed panel data in wide format.
         """
-        return self._panel_data_wide
+        return self._data_subset
 
     @property
     def id_positions(self):
@@ -470,8 +470,8 @@ def _preprocess_data(self, g_value, pre_t, eval_t):
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
 
         # Here: d is a binary treatment indicator
-        x, y = check_X_y(self._x_panel, self._y_panel, force_all_finite=False)
-        x, d = check_X_y(x, self._g_panel, force_all_finite=False)
+        x, y = check_X_y(self._x_data_subset, self._y_data_subset, force_all_finite=False)
+        x, d = check_X_y(x, self._g_data_subset, force_all_finite=False)
         # nuisance g
         # get train indices for d == 0
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
@@ -611,8 +611,8 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, p_hat):
     def _nuisance_tuning(
         self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
     ):
-        x, y = check_X_y(self._x_panel, self._y_panel, force_all_finite=False)
-        x, d = check_X_y(x, self._g_panel, force_all_finite=False)
+        x, y = check_X_y(self._x_data_subset, self._y_data_subset, force_all_finite=False)
+        x, d = check_X_y(x, self._g_data_subset, force_all_finite=False)
 
         # get train indices for d == 0 and d == 1
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
@@ -676,8 +676,8 @@ def _nuisance_tuning(
         return res
 
     def _sensitivity_element_est(self, preds):
-        y = self._y_panel
-        d = self._g_panel
+        y = self._y_data_subset
+        d = self._g_data_subset
 
         m_hat = _get_id_positions(preds["predictions"]["ml_m"], self.id_positions)
         g_hat0 = _get_id_positions(preds["predictions"]["ml_g0"], self.id_positions)
diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index 7788f4b3..9e5ee6c2 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -97,10 +97,10 @@ def __init__(
         self._n_obs_subset = self.data_subset.shape[0]  # Effective sample size used for resampling
 
         # Save x and y for later ML estimation
-        self._x_data = self.data_subset.loc[:, self._dml_data.x_cols].values
-        self._y_data = self.data_subset.loc[:, self._dml_data.y_col].values
-        self._g_data = self.data_subset.loc[:, "G_indicator"].values
-        self._t_data = self.data_subset.loc[:, "t_indicator"].values
+        self._x_data_subset = self.data_subset.loc[:, self._dml_data.x_cols].values
+        self._y_data_subset = self.data_subset.loc[:, self._dml_data.y_col].values
+        self._g_data_subset = self.data_subset.loc[:, "G_indicator"].values
+        self._t_data_subset = self.data_subset.loc[:, "t_indicator"].values
 
         valid_scores = ["observational", "experimental"]
         _check_score(self.score, valid_scores, allow_callable=False)
@@ -402,9 +402,9 @@ def _estimate_conditional_g(
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
 
         # Here: d is a binary treatment indicator
-        x, y = check_X_y(X=self._x_data, y=self._y_data, force_all_finite=False)
-        _, d = check_X_y(x, self._g_data, force_all_finite=False)  # (d is the G_indicator)
-        _, t = check_X_y(x, self._t_data, force_all_finite=False)
+        x, y = check_X_y(X=self._x_data_subset, y=self._y_data_subset, force_all_finite=False)
+        _, d = check_X_y(x, self._g_data_subset, force_all_finite=False)  # (d is the G_indicator)
+        _, t = check_X_y(x, self._t_data_subset, force_all_finite=False)
 
         # THIS DIFFERS FROM THE PAPER due to stratified splitting this should be the same for each fold
         # nuisance estimates of the uncond. treatment prob.
@@ -588,9 +588,9 @@ def _score_elements(self, y, d, t, g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_
     def _nuisance_tuning(
         self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
     ):
-        x, y = check_X_y(X=self._x_data, y=self._y_data, force_all_finite=False)
-        _, d = check_X_y(x, self._g_data, force_all_finite=False)  # (d is the G_indicator)
-        _, t = check_X_y(x, self._t_data, force_all_finite=False)
+        x, y = check_X_y(X=self._x_data_subset, y=self._y_data_subset, force_all_finite=False)
+        _, d = check_X_y(x, self._g_data_subset, force_all_finite=False)  # (d is the G_indicator)
+        _, t = check_X_y(x, self._t_data_subset, force_all_finite=False)
 
         if scoring_methods is None:
             scoring_methods = {"ml_g": None, "ml_m": None}
@@ -702,9 +702,9 @@ def _nuisance_tuning(
         return res
 
     def _sensitivity_element_est(self, preds):
-        y = self._y_data
-        d = self._g_data
-        t = self._t_data
+        y = self._y_data_subset
+        d = self._g_data_subset
+        t = self._t_data_subset
 
         m_hat = _get_id_positions(preds["predictions"]["ml_m"], self.id_positions)
         g_hat_d0_t0 = _get_id_positions(preds["predictions"]["ml_g_d0_t0"], self.id_positions)
diff --git a/doubleml/did/tests/test_did_binary_external_predictions.py b/doubleml/did/tests/test_did_binary_external_predictions.py
index 0cb3e055..0a6cf2f0 100644
--- a/doubleml/did/tests/test_did_binary_external_predictions.py
+++ b/doubleml/did/tests/test_did_binary_external_predictions.py
@@ -40,7 +40,7 @@ def doubleml_did_fixture(did_score, n_rep):
     }
 
     dml_did = DoubleMLDIDBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
-    all_smpls = draw_smpls(n_obs, n_folds, n_rep=n_rep, groups=dml_did._g_panel)
+    all_smpls = draw_smpls(n_obs, n_folds, n_rep=n_rep, groups=dml_did._g_data_subset)
     dml_did.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
@@ -112,7 +112,7 @@ def doubleml_did_panel_fixture(did_score, n_rep):
     }
 
     dml_did = DoubleMLDIDBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
-    all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_panel)
+    all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_data_subset)
     dml_did.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
diff --git a/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py b/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py
index ffeadb51..a921efee 100644
--- a/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py
+++ b/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py
@@ -42,7 +42,7 @@ def doubleml_did_panel_fixture(did_score, n_rep):
     }
 
     dml_did = DoubleMLDIDBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
-    all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_panel)
+    all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_data_subset)
     dml_did.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
diff --git a/doubleml/did/tests/test_did_binary_vs_did_panel.py b/doubleml/did/tests/test_did_binary_vs_did_panel.py
index 9da81739..426b413c 100644
--- a/doubleml/did/tests/test_did_binary_vs_did_panel.py
+++ b/doubleml/did/tests/test_did_binary_vs_did_panel.py
@@ -78,7 +78,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza
     )
     dml_did_binary_obj.fit()
 
-    df_wide = dml_did_binary_obj._panel_data_wide.copy()
+    df_wide = dml_did_binary_obj.data_subset.copy()
     dml_data = dml.data.DoubleMLData(df_wide, y_col="y_diff", d_cols="G_indicator", x_cols=["Z1", "Z2", "Z3", "Z4"])
     dml_did_obj = dml.DoubleMLDID(
         dml_data,
diff --git a/doubleml/did/tests/test_did_cs_binary_external_predictions.py b/doubleml/did/tests/test_did_cs_binary_external_predictions.py
index 477c6dc7..f6b77f0b 100644
--- a/doubleml/did/tests/test_did_cs_binary_external_predictions.py
+++ b/doubleml/did/tests/test_did_cs_binary_external_predictions.py
@@ -114,7 +114,7 @@ def doubleml_did_cs_panel_fixture(did_score, n_rep):
     }
 
     dml_did = DoubleMLDIDCSBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
-    all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_data)
+    all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_data_subset)
     dml_did.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
diff --git a/doubleml/did/tests/test_return_types.py b/doubleml/did/tests/test_return_types.py
index 1b6fa736..683b1dc1 100644
--- a/doubleml/did/tests/test_return_types.py
+++ b/doubleml/did/tests/test_return_types.py
@@ -122,12 +122,12 @@ def test_panel_return_types(dml_obj, cls):
     assert isinstance(dml_obj.t_value_pre, (int, np.integer, float, np.floating))
     assert isinstance(dml_obj.post_treatment, bool)
 
-    # Test panel_data_wide property
-    assert isinstance(dml_obj.panel_data_wide, pd.DataFrame)
-    assert dml_obj.panel_data_wide.shape[0] <= N_OBS
-    assert "G_indicator" in dml_obj.panel_data_wide.columns
-    assert "C_indicator" in dml_obj.panel_data_wide.columns
-    assert "y_diff" in dml_obj.panel_data_wide.columns
+    # Test data_subset property
+    assert isinstance(dml_obj.data_subset, pd.DataFrame)
+    assert dml_obj.data_subset.shape[0] <= N_OBS
+    assert "G_indicator" in dml_obj.data_subset.columns
+    assert "C_indicator" in dml_obj.data_subset.columns
+    assert "y_diff" in dml_obj.data_subset.columns
 
     # Test id_positions property
     assert isinstance(dml_obj.id_positions, np.ndarray)

From 1eec50ced37a629425dcd66994839412d7d1f6d6 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Thu, 12 Jun 2025 16:17:50 +0200
Subject: [PATCH 60/84] fix panel data backend / unit tests

---
 doubleml/data/panel_data.py                       | 15 ++++++++++-----
 doubleml/data/tests/test_panel_data.py            |  2 +-
 doubleml/data/tests/test_panel_data_exceptions.py |  2 +-
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/doubleml/data/panel_data.py b/doubleml/data/panel_data.py
index c1ec3bb5..a3651756 100644
--- a/doubleml/data/panel_data.py
+++ b/doubleml/data/panel_data.py
@@ -157,8 +157,9 @@ def datetime_unit(self):
         """
         The unit of the time variable.
         """
-        return self._datetime_unit @ property
+        return self._datetime_unit
 
+    @property
     def d(self):
         """
         Array of treatment variable;
@@ -228,9 +229,13 @@ def g_col(self):
         """
         The treatment variable indicating the time of treatment exposure.
         """
-        return self._d_cols[0] @ DoubleMLData.d_cols.setter
+        return self._d_cols[0] 
+
 
+    @ DoubleMLData.d_cols.setter
     def d_cols(self, value):
+        if isinstance(value, str):
+            value = [value]
         super(self.__class__, self.__class__).d_cols.__set__(self, value)
         if hasattr(self, "_g_values"):
             self._g_values = np.sort(np.unique(self.d))  # update unique values of g
@@ -266,7 +271,7 @@ def t_col(self, value):
             )
         # Check if data exists (during initialization it might not)
         if hasattr(self, "_data") and value not in self.all_variables:
-            raise ValueError("Invalid time variable t_col. The time variable is no data column.")
+            raise ValueError(f"Invalid time variable t_col. {value} is no data column.")
         self._t_col = value
         # Update time variable array if data is already loaded
         if hasattr(self, "_data"):
@@ -301,8 +306,8 @@ def _check_disjoint_sets(self):
         self._check_disjoint_sets_t_col()
 
     def _check_disjoint_sets_id_col(self):
-        # apply the standard checks from the DoubleMLData class
-        super(DoubleMLPanelData, self)._check_disjoint_sets()
+        # The call to super()._check_disjoint_sets() is removed from here as it's redundant
+        # and called in the main _check_disjoint_sets method of this class.
 
         # special checks for the additional id variable (and the time variable)
         id_col_set = {self.id_col}
diff --git a/doubleml/data/tests/test_panel_data.py b/doubleml/data/tests/test_panel_data.py
index 2f2250ba..a9ea0ea2 100644
--- a/doubleml/data/tests/test_panel_data.py
+++ b/doubleml/data/tests/test_panel_data.py
@@ -33,7 +33,7 @@ def test_t_col_setter():
     with pytest.raises(ValueError, match=msg):
         dml_data.t_col = "a13"
 
-    msg = r"The time variable t_col must be of str type \(or None\). " "5 of type <class 'int'> was passed."
+    msg = r"The time variable t_col must be of str type. " "5 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_data.t_col = 5
 
diff --git a/doubleml/data/tests/test_panel_data_exceptions.py b/doubleml/data/tests/test_panel_data_exceptions.py
index fab648fe..7480bce1 100644
--- a/doubleml/data/tests/test_panel_data_exceptions.py
+++ b/doubleml/data/tests/test_panel_data_exceptions.py
@@ -109,5 +109,5 @@ def test_invalid_datetime_unit(sample_data):
 # test if no exception is raised
 @pytest.mark.ci
 def test_no_exception(sample_data):
-    DoubleMLPanelData(data=sample_data, y_col="y", d_cols="treatment", t_col="time", id_col="id")
+    DoubleMLPanelData(data=sample_data, y_col="y", d_cols=["treatment"], t_col="time", id_col="id")
     assert True

From d71dff605fa55cbbda4e75a8cea0ab4298e7b3b7 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Thu, 12 Jun 2025 16:18:04 +0200
Subject: [PATCH 61/84] fix did data backend / unit tests

---
 doubleml/did/datasets/dgp_did_SZ2020.py                | 8 ++++----
 doubleml/did/tests/test_datasets.py                    | 6 +++---
 doubleml/did/tests/test_did_cs_external_predictions.py | 2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/doubleml/did/datasets/dgp_did_SZ2020.py b/doubleml/did/datasets/dgp_did_SZ2020.py
index db82b032..eb150bbf 100644
--- a/doubleml/did/datasets/dgp_did_SZ2020.py
+++ b/doubleml/did/datasets/dgp_did_SZ2020.py
@@ -189,7 +189,7 @@ def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_ty
             if return_type in _data_frame_alias:
                 return data
             else:
-                return DoubleMLDIDData(data, "y", "d", x_cols=z_cols)
+                return DoubleMLDIDData(data, y_col="y", d_cols="d", x_cols=z_cols)
         elif return_type == "DoubleMLPanelData":
             z_cols = [f"Z{i + 1}" for i in np.arange(dim_x)]
             df0 = (
@@ -218,7 +218,7 @@ def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_ty
             )
             df = pd.concat([df0, df1], axis=0)
 
-            return DoubleMLPanelData(df, "y", "d", t_col="t", id_col="id", x_cols=z_cols)
+            return DoubleMLPanelData(df, y_col="y", d_cols="d", t_col="t", id_col="id", x_cols=z_cols)
         else:
             raise ValueError("Invalid return_type.")
 
@@ -235,6 +235,6 @@ def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_ty
             if return_type in _data_frame_alias:
                 return data
             elif return_type in _dml_did_data_alias:
-                return DoubleMLDIDData(data, "y", "d", x_cols=z_cols, t_col="t")
+                return DoubleMLDIDData(data, y_col="y", d_cols="d", x_cols=z_cols, t_col="t")
         else:
-            raise ValueError("Invalid return_type.")
\ No newline at end of file
+            raise ValueError("Invalid return_type.")
diff --git a/doubleml/did/tests/test_datasets.py b/doubleml/did/tests/test_datasets.py
index 0e323ec9..508769eb 100644
--- a/doubleml/did/tests/test_datasets.py
+++ b/doubleml/did/tests/test_datasets.py
@@ -2,7 +2,7 @@
 import pandas as pd
 import pytest
 
-from doubleml import DoubleMLData
+from doubleml import DoubleMLDIDData
 from doubleml.did.datasets import make_did_CS2021, make_did_SZ2020
 
 msg_inv_return_type = "Invalid return_type."
@@ -21,8 +21,8 @@ def dgp_type(request):
 @pytest.mark.ci
 def test_make_did_SZ2020_return_types(cross_sectional, dgp_type):
     np.random.seed(3141)
-    res = make_did_SZ2020(n_obs=100, dgp_type=dgp_type, cross_sectional_data=cross_sectional, return_type=DoubleMLData)
-    assert isinstance(res, DoubleMLData)
+    res = make_did_SZ2020(n_obs=100, dgp_type=dgp_type, cross_sectional_data=cross_sectional, return_type=DoubleMLDIDData)
+    assert isinstance(res, DoubleMLDIDData)
     res = make_did_SZ2020(n_obs=100, dgp_type=dgp_type, cross_sectional_data=cross_sectional, return_type=pd.DataFrame)
     assert isinstance(res, pd.DataFrame)
     if cross_sectional:
diff --git a/doubleml/did/tests/test_did_cs_external_predictions.py b/doubleml/did/tests/test_did_cs_external_predictions.py
index 2b28ac8a..1c5f6640 100644
--- a/doubleml/did/tests/test_did_cs_external_predictions.py
+++ b/doubleml/did/tests/test_did_cs_external_predictions.py
@@ -24,7 +24,7 @@ def n_rep(request):
 @pytest.fixture(scope="module")
 def doubleml_didcs_fixture(did_score, n_rep):
     ext_predictions = {"d": {}}
-    dml_data = make_did_SZ2020(n_obs=500, cross_sectional_data=True, return_type="DoubleMLData")
+    dml_data = make_did_SZ2020(n_obs=500, cross_sectional_data=True, return_type="DoubleMLDIDData")
     all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
     kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "n_folds": 5, "draw_sample_splitting": False}
     dml_did_cs = DoubleMLDIDCS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)

From 74ef476768d50fc21650b21a30011374f1f43f3b Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Thu, 12 Jun 2025 16:35:51 +0200
Subject: [PATCH 62/84] add depr. warning with version

---
 doubleml/data/__init__.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/doubleml/data/__init__.py b/doubleml/data/__init__.py
index 7d368b76..0462c763 100644
--- a/doubleml/data/__init__.py
+++ b/doubleml/data/__init__.py
@@ -2,7 +2,6 @@
 The :mod:`doubleml.data` module implements data classes for double machine learning.
 """
 
-from .base_data import DoubleMLData
 import warnings
 
 from .base_data import DoubleMLData
@@ -33,7 +32,8 @@ def __init__(
         force_all_x_finite=True,
     ):
         warnings.warn(
-            "DoubleMLClusterData is deprecated. " "Use DoubleMLData with is_cluster_data=True instead.",
+            "DoubleMLClusterData is deprecated and will be removed with version 0.12.0. "
+            "Use DoubleMLData with is_cluster_data=True instead.",
             FutureWarning,
             stacklevel=2,
         )
@@ -56,10 +56,12 @@ def from_arrays(
     ):
         """
         Initialize :class:`DoubleMLClusterData` from :class:`numpy.ndarray`'s.
-        This method is deprecated, use DoubleMLData.from_arrays with is_cluster_data=True instead.
+        This method is deprecated and will be removed with version 0.12.0,
+        use DoubleMLData.from_arrays with is_cluster_data=True instead.
         """
         warnings.warn(
-            "DoubleMLClusterData is deprecated. " "Use DoubleMLData.from_arrays with is_cluster_data=True instead.",
+            "DoubleMLClusterData is deprecated and will be removed with version 0.12.0. "
+            "Use DoubleMLData.from_arrays with is_cluster_data=True instead.",
             FutureWarning,
             stacklevel=2,
         )

From e7a9f5c75e0fd7fc5aff78998012052e7e993f51 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 12 Jun 2025 16:39:36 +0200
Subject: [PATCH 63/84] update return type tests for did cs binary

---
 doubleml/did/did_cs_binary.py           |  2 +-
 doubleml/did/tests/test_return_types.py | 58 +++++++++++++++++++++----
 2 files changed, 50 insertions(+), 10 deletions(-)

diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index 9e5ee6c2..a6005d53 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -90,7 +90,7 @@ def __init__(
 
         # Find position of data subset in original data
         # These entries should be replaced by nuisance predictions, all others should be set to 0.
-        self._id_positions = self.data_subset.index
+        self._id_positions = self.data_subset.index.values
 
         # Numeric values for positions of the entries in id_panel_data inside id_original
         # np.nonzero(np.isin(id_original, id_panel_data))
diff --git a/doubleml/did/tests/test_return_types.py b/doubleml/did/tests/test_return_types.py
index 683b1dc1..37105c3e 100644
--- a/doubleml/did/tests/test_return_types.py
+++ b/doubleml/did/tests/test_return_types.py
@@ -4,8 +4,8 @@
 from sklearn.linear_model import Lasso, LogisticRegression
 
 from doubleml.data import DoubleMLData, DoubleMLPanelData
-from doubleml.did import DoubleMLDID, DoubleMLDIDBinary, DoubleMLDIDCS
-from doubleml.did.datasets import make_did_CS2021, make_did_SZ2020
+from doubleml.did import DoubleMLDID, DoubleMLDIDBinary, DoubleMLDIDCS, DoubleMLDIDCSBinary
+from doubleml.did.datasets import make_did_CS2021, make_did_cs_CS2021, make_did_SZ2020
 from doubleml.utils._check_return_types import (
     check_basic_predictions_and_targets,
     check_basic_property_types_and_shapes,
@@ -89,6 +89,17 @@ def test_sensitivity_return_types(fitted_dml_obj):
     df_panel, y_col="y_binary", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]
 )
 
+# Create a dataset for DoubleMLDIDCSBinary
+df_panel_cs = make_did_cs_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=N_PERIODS, time_type="float")
+df_panel_cs["y_binary"] = np.random.binomial(n=1, p=0.5, size=df_panel_cs.shape[0])
+datasets["did_panel_cs"] = DoubleMLPanelData(
+    df_panel_cs, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]
+)
+datasets["did_panel_cs_binary_outcome"] = DoubleMLPanelData(
+    df_panel_cs, y_col="y_binary", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]
+)
+
+
 dml_panel_binary_args = dml_args | {
     "g_value": 2,
     "t_value_pre": 0,
@@ -106,6 +117,19 @@ def test_sensitivity_return_types(fitted_dml_obj):
         ),
         DoubleMLDIDBinary,
     ),
+    (
+        DoubleMLDIDCSBinary(datasets["did_panel_cs"], ml_g=Lasso(), ml_m=LogisticRegression(), **dml_panel_binary_args),
+        DoubleMLDIDCSBinary,
+    ),
+    (
+        DoubleMLDIDCSBinary(
+            datasets["did_panel_cs_binary_outcome"],
+            ml_g=LogisticRegression(),
+            ml_m=LogisticRegression(),
+            **dml_panel_binary_args,
+        ),
+        DoubleMLDIDCSBinary,
+    ),
 ]
 
 
@@ -124,10 +148,14 @@ def test_panel_return_types(dml_obj, cls):
 
     # Test data_subset property
     assert isinstance(dml_obj.data_subset, pd.DataFrame)
-    assert dml_obj.data_subset.shape[0] <= N_OBS
+    if isinstance(dml_obj, DoubleMLDIDBinary):
+        assert dml_obj.data_subset.shape[0] <= N_OBS
+        assert "y_diff" in dml_obj.data_subset.columns
+    elif isinstance(dml_obj, DoubleMLDIDCSBinary):
+        assert dml_obj.data_subset.shape[0] <= N_OBS * 2
+        assert "t_indicator" in dml_obj.data_subset.columns
     assert "G_indicator" in dml_obj.data_subset.columns
     assert "C_indicator" in dml_obj.data_subset.columns
-    assert "y_diff" in dml_obj.data_subset.columns
 
     # Test id_positions property
     assert isinstance(dml_obj.id_positions, np.ndarray)
@@ -142,7 +170,10 @@ def test_panel_return_types(dml_obj, cls):
 
     # Test n_obs property
     assert isinstance(dml_obj.n_obs, (int, np.integer))
-    assert dml_obj.n_obs <= N_OBS
+    if isinstance(dml_obj, DoubleMLDIDBinary):
+        assert dml_obj.n_obs <= N_OBS
+    elif isinstance(dml_obj, DoubleMLDIDCSBinary):
+        assert dml_obj.n_obs <= N_OBS * N_PERIODS
 
     # Test consistency between properties
     if dml_obj.post_treatment:
@@ -161,20 +192,29 @@ def fitted_panel_dml_obj(request):
 
 @pytest.mark.ci
 def test_panel_property_types_and_shapes(fitted_panel_dml_obj):
+    # n_obs for psi, psi_a, psi_b checks within check_basic_property_types_and_shapes
+    # This should be the number of observations used for the score calculation.
+    # For DIDBinary, it's n_ids. For DIDCSBinary, it's _n_obs_subset.
+    # Both are consistently available as fitted_panel_dml_obj.n_obs.
+    actual_score_dim = (fitted_panel_dml_obj.n_obs, N_REP, N_TREAT)
+
     check_basic_property_types_and_shapes(
         fitted_panel_dml_obj,
-        n_obs=N_PERIODS * N_OBS,
+        n_obs=fitted_panel_dml_obj._dml_data.n_obs,
         n_treat=N_TREAT,
         n_rep=N_REP,
         n_folds=N_FOLDS,
         n_rep_boot=N_REP_BOOT,
-        score_dim=(N_OBS, N_REP, N_TREAT),
+        score_dim=actual_score_dim,  # Used for psi shape
     )
-    check_basic_predictions_and_targets(fitted_panel_dml_obj, N_OBS, N_TREAT, N_REP)
+
+    check_basic_predictions_and_targets(fitted_panel_dml_obj, fitted_panel_dml_obj.n_obs, N_TREAT, N_REP)
 
 
 @pytest.mark.ci
 def test_panel_sensitivity_return_types(fitted_panel_dml_obj):
     if fitted_panel_dml_obj._sensitivity_implemented:
         benchmarking_set = [fitted_panel_dml_obj._dml_data.x_cols[0]]
-        check_sensitivity_return_types(fitted_panel_dml_obj, N_OBS, N_REP, N_TREAT, benchmarking_set=benchmarking_set)
+        check_sensitivity_return_types(
+            fitted_panel_dml_obj, fitted_panel_dml_obj.n_obs, N_REP, N_TREAT, benchmarking_set=benchmarking_set
+        )

From bba51605df0716294bef50a87ce557904e46c4e7 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Thu, 12 Jun 2025 17:05:54 +0200
Subject: [PATCH 64/84] adjust unit tests for ssm

---
 doubleml/irm/tests/test_ssm.py      | 4 ++--
 doubleml/irm/tests/test_ssm_tune.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doubleml/irm/tests/test_ssm.py b/doubleml/irm/tests/test_ssm.py
index b157794b..c561d9fe 100644
--- a/doubleml/irm/tests/test_ssm.py
+++ b/doubleml/irm/tests/test_ssm.py
@@ -54,11 +54,11 @@ def dml_selection_fixture(
 
     np.random.seed(42)
     if score == "missing-at-random":
-        obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, z=None, s=s)
+        obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s)
         dml_sel_obj = dml.DoubleMLSSM(obj_dml_data, ml_g, ml_pi, ml_m, n_folds=n_folds, score=score)
     else:
         assert score == "nonignorable"
-        obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, z=z, s=s)
+        obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=z, s=s)
         dml_sel_obj = dml.DoubleMLSSM(obj_dml_data, ml_g, ml_pi, ml_m, n_folds=n_folds, score=score)
 
     np.random.seed(42)
diff --git a/doubleml/irm/tests/test_ssm_tune.py b/doubleml/irm/tests/test_ssm_tune.py
index 0fafbc13..4e48bec3 100644
--- a/doubleml/irm/tests/test_ssm_tune.py
+++ b/doubleml/irm/tests/test_ssm_tune.py
@@ -76,7 +76,7 @@ def dml_ssm_fixture(
 
     np.random.seed(42)
     if score == "missing-at-random":
-        obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, z=None, s=s)
+        obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s)
         dml_sel_obj = dml.DoubleMLSSM(
             obj_dml_data,
             ml_g,
@@ -89,7 +89,7 @@ def dml_ssm_fixture(
         )
     else:
         assert score == "nonignorable"
-        obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, z=z, s=s)
+        obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=z, s=s)
         dml_sel_obj = dml.DoubleMLSSM(
             obj_dml_data,
             ml_g,

From 96ebd03efa3674506dd859c2eec306cacd6aa36b Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Thu, 12 Jun 2025 17:06:06 +0200
Subject: [PATCH 65/84] adjust unit tests for did

---
 doubleml/did/tests/test_did_cs_tune.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/did/tests/test_did_cs_tune.py b/doubleml/did/tests/test_did_cs_tune.py
index 5ec33e82..50415937 100644
--- a/doubleml/did/tests/test_did_cs_tune.py
+++ b/doubleml/did/tests/test_did_cs_tune.py
@@ -67,7 +67,7 @@ def dml_did_cs_fixture(generate_data_did_cs, learner_g, learner_m, score, in_sam
     all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d + 2 * t)
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, t=t)
+    obj_dml_data = dml.DoubleMLDIDData.from_arrays(x, y, d, t=t)
     dml_did_cs_obj = dml.DoubleMLDIDCS(
         obj_dml_data,
         ml_g,

From a1686d5c7b4b2d51287f19c3aa2557d866b4e798 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Thu, 12 Jun 2025 17:06:17 +0200
Subject: [PATCH 66/84] adjust unit tests general

---
 doubleml/tests/test_datasets.py   | 2 +-
 doubleml/tests/test_exceptions.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py
index 8f1c4f03..aa1d9433 100644
--- a/doubleml/tests/test_datasets.py
+++ b/doubleml/tests/test_datasets.py
@@ -151,7 +151,7 @@ def test_make_pliv_CHS2015_return_types():
 @pytest.mark.ci
 def test_make_pliv_multiway_cluster_CKMS2021_return_types():
     np.random.seed(3141)
-    res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DoubleMLClusterData")
+    res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DoubleMLData")
     assert isinstance(res, DoubleMLClusterData)
     res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DataFrame")
     assert isinstance(res, pd.DataFrame)
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index d8fe4e7c..5178adc6 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -8,7 +8,7 @@
 
 from doubleml import (
     DoubleMLBLP,
-    DoubleMLClusterData,
+    DoubleMLDIDData,
     DoubleMLCVAR,
     DoubleMLData,
     DoubleMLDID,
@@ -265,11 +265,11 @@ def test_doubleml_exception_data():
     df_did_cs["d"] = df_did_cs["d"] * 2
     with pytest.raises(ValueError, match=msg):
         # non-binary D for DIDCS
-        _ = DoubleMLDIDCS(DoubleMLData(df_did_cs, y_col="y", d_cols="d", t_col="t"), Lasso(), LogisticRegression())
+        _ = DoubleMLDIDCS(DoubleMLDIDData(df_did_cs, y_col="y", d_cols="d", t_col="t"), Lasso(), LogisticRegression())
     df_did_cs = dml_data_did_cs.data.copy()
     with pytest.raises(ValueError, match=msg):
         # multiple D for DIDCS
-        _ = DoubleMLDIDCS(DoubleMLData(df_did_cs, y_col="y", d_cols=["d", "Z1"], t_col="t"), Lasso(), LogisticRegression())
+        _ = DoubleMLDIDCS(DoubleMLDIDData(df_did_cs, y_col="y", d_cols=["d", "Z1"], t_col="t"), Lasso(), LogisticRegression())
 
     # DIDCS time exceptions
     msg = (
@@ -280,7 +280,7 @@ def test_doubleml_exception_data():
     df_did_cs["t"] = df_did_cs["t"] * 2
     with pytest.raises(ValueError, match=msg):
         # non-binary t for DIDCS
-        _ = DoubleMLDIDCS(DoubleMLData(df_did_cs, y_col="y", d_cols="d", t_col="t"), Lasso(), LogisticRegression())
+        _ = DoubleMLDIDCS(DoubleMLDIDData(df_did_cs, y_col="y", d_cols="d", t_col="t"), Lasso(), LogisticRegression())
 
 
 @pytest.mark.ci

From 756092c9ba6b2831d398f2aa79024b2c7eed3b78 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Thu, 12 Jun 2025 17:11:47 +0200
Subject: [PATCH 67/84] adjust unit tests general

---
 doubleml/tests/test_datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py
index aa1d9433..b31091a6 100644
--- a/doubleml/tests/test_datasets.py
+++ b/doubleml/tests/test_datasets.py
@@ -152,7 +152,7 @@ def test_make_pliv_CHS2015_return_types():
 def test_make_pliv_multiway_cluster_CKMS2021_return_types():
     np.random.seed(3141)
     res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DoubleMLData")
-    assert isinstance(res, DoubleMLClusterData)
+    assert isinstance(res, DoubleMLData)
     res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DataFrame")
     assert isinstance(res, pd.DataFrame)
     x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="array")

From 6bac76e99959d092125abced8caa32ee44bd7c5a Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 13 Jun 2025 08:34:27 +0200
Subject: [PATCH 68/84] enhance did_multi plotting with anticipation periods
 and update color palette handling

---
 doubleml/data/utils/panel_data_utils.py   |  50 +++++++++
 doubleml/did/did_multi.py                 | 123 +++++++++++++---------
 doubleml/did/tests/test_did_multi_plot.py |   2 +-
 3 files changed, 123 insertions(+), 52 deletions(-)

diff --git a/doubleml/data/utils/panel_data_utils.py b/doubleml/data/utils/panel_data_utils.py
index abd365eb..cc94d39f 100644
--- a/doubleml/data/utils/panel_data_utils.py
+++ b/doubleml/data/utils/panel_data_utils.py
@@ -1,8 +1,58 @@
+import pandas as pd
+
 valid_datetime_units = {"Y", "M", "D", "h", "m", "s", "ms", "us", "ns"}
 
+# Units that can be used with pd.Timedelta (unambiguous)
+timedelta_compatible_units = {"D", "h", "m", "s", "ms", "us", "ns"}
+
+# Units that require period arithmetic (ambiguous)
+period_only_units = {"Y", "M"}
+
 
 def _is_valid_datetime_unit(unit):
     if unit not in valid_datetime_units:
         raise ValueError("Invalid datetime unit.")
     else:
         return unit
+
+
+def _is_timedelta_compatible(unit):
+    """Check if a datetime unit can be used with pd.Timedelta."""
+    return unit in timedelta_compatible_units
+
+
+def _subtract_periods_safe(datetime_values, reference_datetime, periods, unit):
+    """
+    Safely subtract periods from datetime values, handling both timedelta-compatible
+    and period-only units.
+
+    Parameters
+    ----------
+    datetime_values : pandas.Series or numpy.array
+        Array of datetime values to compare
+    reference_datetime : datetime-like
+        Reference datetime to subtract periods from
+    periods : int
+        Number of periods to subtract
+    unit : str
+        Datetime unit
+
+    Returns
+    -------
+    numpy.array
+        Boolean array indicating which datetime_values are >= (reference_datetime - periods)
+    """
+    if periods == 0:
+        # No anticipation periods, so no datetime arithmetic needed
+        return datetime_values >= reference_datetime
+
+    if _is_timedelta_compatible(unit):
+        # Use Timedelta for unambiguous units
+        period_offset = pd.Timedelta(periods, unit=unit)
+        return datetime_values >= (reference_datetime - period_offset)
+    else:
+        # Use Period arithmetic for ambiguous units like 'M' and 'Y'
+        ref_period = pd.Period(reference_datetime, freq=unit)
+        ref_minus_periods = ref_period - periods
+        datetime_periods = pd.PeriodIndex(datetime_values, freq=unit)
+        return datetime_periods >= ref_minus_periods
diff --git a/doubleml/did/did_multi.py b/doubleml/did/did_multi.py
index 646ad41d..cdfe0756 100644
--- a/doubleml/did/did_multi.py
+++ b/doubleml/did/did_multi.py
@@ -10,6 +10,7 @@
 from sklearn.base import clone
 
 from doubleml.data import DoubleMLPanelData
+from doubleml.data.utils.panel_data_utils import _subtract_periods_safe
 from doubleml.did.did_aggregation import DoubleMLDIDAggregation
 from doubleml.did.did_binary import DoubleMLDIDBinary
 from doubleml.did.did_cs_binary import DoubleMLDIDCSBinary
@@ -989,8 +990,9 @@ def plot_effects(
         first_treated_periods = sorted(df["First Treated"].unique())
         n_periods = len(first_treated_periods)
 
-        # Set up colors
-        colors = dict(zip(["pre", "post"], sns.color_palette(color_palette)[:2]))
+        # Set up colors - ensure 'post' always gets the second color
+        palette_colors = sns.color_palette(color_palette)
+        colors = {"pre": palette_colors[0], "post": palette_colors[1], "anticipation": palette_colors[2]}
 
         # Check if x-axis is datetime or convert to float
         is_datetime = pd.api.types.is_datetime64_any_dtype(df["Evaluation Period"])
@@ -1034,9 +1036,20 @@ def plot_effects(
             Line2D([0], [0], color="red", linestyle=":", alpha=0.7, label="Treatment start"),
             Line2D([0], [0], color="black", linestyle="--", alpha=0.5, label="Zero effect"),
             Line2D([0], [0], marker="o", color=colors["pre"], linestyle="None", label="Pre-treatment", markersize=5),
-            Line2D([0], [0], marker="o", color=colors["post"], linestyle="None", label="Post-treatment", markersize=5),
         ]
-        legend_ax.legend(handles=legend_elements, loc="center", ncol=4, mode="expand", borderaxespad=0.0)
+
+        if self.anticipation_periods > 0:
+            legend_elements.append(
+                Line2D(
+                    [0], [0], marker="o", color=colors["anticipation"], linestyle="None", label="Anticipation", markersize=5
+                )
+            )
+
+        legend_elements.append(
+            Line2D([0], [0], marker="o", color=colors["post"], linestyle="None", label="Post-treatment", markersize=5)
+        )
+
+        legend_ax.legend(handles=legend_elements, loc="center", ncol=len(legend_elements), mode="expand", borderaxespad=0.0)
 
         # Set title and layout
         plt.suptitle(title, y=1.02)
@@ -1057,7 +1070,7 @@ def _plot_single_group(self, ax, period_df, period, colors, is_datetime, jitter_
         period : int or datetime
             Treatment period for this group.
         colors : dict
-            Dictionary with 'pre' and 'post' color values.
+            Dictionary with 'pre', 'anticipation' (if applicable), and 'post' color values.
         is_datetime : bool
             Whether the x-axis represents datetime values.
         jitter_value : float
@@ -1074,56 +1087,64 @@ def _plot_single_group(self, ax, period_df, period, colors, is_datetime, jitter_
         ax.axvline(x=period, color="red", linestyle=":", alpha=0.7)
         ax.axhline(y=0, color="black", linestyle="--", alpha=0.5)
 
-        # Split and jitter data
-        pre_treatment = add_jitter(
-            period_df[period_df["Pre-Treatment"]],
-            "Evaluation Period",
-            is_datetime=is_datetime,
-            jitter_value=jitter_value,
-        )
-        post_treatment = add_jitter(
-            period_df[~period_df["Pre-Treatment"]],
-            "Evaluation Period",
-            is_datetime=is_datetime,
-            jitter_value=jitter_value,
-        )
-
-        # Plot pre-treatment points
-        if not pre_treatment.empty:
-            ax.scatter(pre_treatment["jittered_x"], pre_treatment["Estimate"], color=colors["pre"], alpha=0.8, s=30)
-            ax.errorbar(
-                pre_treatment["jittered_x"],
-                pre_treatment["Estimate"],
-                yerr=[
-                    pre_treatment["Estimate"] - pre_treatment["CI Lower"],
-                    pre_treatment["CI Upper"] - pre_treatment["Estimate"],
-                ],
-                fmt="o",
-                capsize=3,
-                color=colors["pre"],
-                markersize=4,
-                markeredgewidth=1,
-                linewidth=1,
+        # Categorize periods
+        if is_datetime:
+            # For datetime, use safe period arithmetic that handles both timedelta-compatible and period-only units
+            anticipation_ge_mask = _subtract_periods_safe(
+                period_df["Evaluation Period"], period, self.anticipation_periods, self._dml_data.datetime_unit
             )
+            anticipation_mask = (
+                (self.anticipation_periods > 0)
+                & period_df["Pre-Treatment"]
+                & anticipation_ge_mask
+                & (period_df["Evaluation Period"] < period)
+            )
+        else:
+            # For numeric periods, simple arithmetic works
+            anticipation_mask = (
+                (self.anticipation_periods > 0)
+                & period_df["Pre-Treatment"]
+                & (period_df["Evaluation Period"] >= period - self.anticipation_periods)
+                & (period_df["Evaluation Period"] < period)
+            )
+
+        pre_treatment_mask = period_df["Pre-Treatment"] & ~anticipation_mask
+        post_treatment_mask = ~period_df["Pre-Treatment"]
+
+        # Define category mappings
+        categories = [("pre", pre_treatment_mask), ("anticipation", anticipation_mask), ("post", post_treatment_mask)]
 
-        # Plot post-treatment points
-        if not post_treatment.empty:
-            ax.scatter(post_treatment["jittered_x"], post_treatment["Estimate"], color=colors["post"], alpha=0.8, s=30)
-            ax.errorbar(
-                post_treatment["jittered_x"],
-                post_treatment["Estimate"],
-                yerr=[
-                    post_treatment["Estimate"] - post_treatment["CI Lower"],
-                    post_treatment["CI Upper"] - post_treatment["Estimate"],
-                ],
-                fmt="o",
-                capsize=3,
-                color=colors["post"],
-                markersize=4,
-                markeredgewidth=1,
-                linewidth=1,
+        # Plot each category
+        for category_name, mask in categories:
+            if not mask.any():
+                continue
+
+            category_data = add_jitter(
+                period_df[mask],
+                "Evaluation Period",
+                is_datetime=is_datetime,
+                jitter_value=jitter_value,
             )
 
+            if not category_data.empty:
+                ax.scatter(
+                    category_data["jittered_x"], category_data["Estimate"], color=colors[category_name], alpha=0.8, s=30
+                )
+                ax.errorbar(
+                    category_data["jittered_x"],
+                    category_data["Estimate"],
+                    yerr=[
+                        category_data["Estimate"] - category_data["CI Lower"],
+                        category_data["CI Upper"] - category_data["Estimate"],
+                    ],
+                    fmt="o",
+                    capsize=3,
+                    color=colors[category_name],
+                    markersize=4,
+                    markeredgewidth=1,
+                    linewidth=1,
+                )
+
         # Format axes
         if is_datetime:
             period_str = np.datetime64(period, self._dml_data.datetime_unit)
diff --git a/doubleml/did/tests/test_did_multi_plot.py b/doubleml/did/tests/test_did_multi_plot.py
index bcb8b786..5bcd0aae 100644
--- a/doubleml/did/tests/test_did_multi_plot.py
+++ b/doubleml/did/tests/test_did_multi_plot.py
@@ -130,7 +130,7 @@ def test_plot_effects_color_palette(doubleml_did_fixture):
     assert isinstance(fig, plt.Figure)
 
     # Test with a custom color list
-    custom_colors = [(1, 0, 0), (0, 1, 0)]  # Red and green
+    custom_colors = [(1, 0, 0), (0, 1, 0), (0, 0, 1)]  # Red, Green, Blue
     fig, _ = dml_obj.plot_effects(color_palette=custom_colors)
     assert isinstance(fig, plt.Figure)
 

From 77b1a6b53634841e90bb6f2fe848bddad2c038cd Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 13 Jun 2025 10:19:35 +0200
Subject: [PATCH 69/84] update data summary to include unique IDs count in
 DoubleMLPanelData

---
 doubleml/data/panel_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/data/panel_data.py b/doubleml/data/panel_data.py
index 59ad531c..4ba659ce 100644
--- a/doubleml/data/panel_data.py
+++ b/doubleml/data/panel_data.py
@@ -141,7 +141,7 @@ def _data_summary_str(self):
             f"Id variable: {self.id_col}\n"
         )
 
-        data_summary += f"No. Observations: {self.n_obs}\n"
+        data_summary += f"No. Unique Ids: {self.n_ids}\n" f"No. Observations: {self.n_obs}\n"
         return data_summary
 
     @classmethod

From e52122f348e222b06b581df323c306825e3fb108 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 16 Jun 2025 11:04:59 +0200
Subject: [PATCH 70/84] add flexible summary with multiple formats

---
 doubleml/did/did_binary.py    |  63 ++++----------------
 doubleml/did/did_cs_binary.py |  65 ++++----------------
 doubleml/double_ml.py         | 109 +++++++++++++++++++++++-----------
 doubleml/irm/iivm.py          |  15 +----
 4 files changed, 100 insertions(+), 152 deletions(-)

diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py
index 99e18e28..99ce7ef9 100644
--- a/doubleml/did/did_binary.py
+++ b/doubleml/did/did_binary.py
@@ -239,58 +239,17 @@ def __init__(
         self._sensitivity_implemented = True
         self._external_predictions_implemented = True
 
-    def __str__(self):
-        class_name = self.__class__.__name__
-        header = f"================== {class_name} Object ==================\n"
-        data_summary = self._dml_data._data_summary_str()
-        score_info = (
-            f"Score function: {str(self.score)}\n"
-            f"Treatment group: {str(self.g_value)}\n"
-            f"Pre-treatment period: {str(self.t_value_pre)}\n"
-            f"Evaluation period: {str(self.t_value_eval)}\n"
-            f"Control group: {str(self.control_group)}\n"
-            f"Anticipation periods: {str(self.anticipation_periods)}\n"
-            f"Effective sample size: {str(self.n_obs_subset)}\n"
-        )
-        learner_info = ""
-        for key, value in self.learner.items():
-            learner_info += f"Learner {key}: {str(value)}\n"
-        if self.nuisance_loss is not None:
-            learner_info += "Out-of-sample Performance:\n"
-            is_classifier = [value for value in self._is_classifier.values()]
-            is_regressor = [not value for value in is_classifier]
-            if any(is_regressor):
-                learner_info += "Regression:\n"
-                for learner in [key for key, value in self._is_classifier.items() if value is False]:
-                    learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n"
-            if any(is_classifier):
-                learner_info += "Classification:\n"
-                for learner in [key for key, value in self._is_classifier.items() if value is True]:
-                    learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n"
-
-        if self._is_cluster_data:
-            resampling_info = (
-                f"No. folds per cluster: {self._n_folds_per_cluster}\n"
-                f"No. folds: {self.n_folds}\n"
-                f"No. repeated sample splits: {self.n_rep}\n"
-            )
-        else:
-            resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n"
-        fit_summary = str(self.summary)
-        res = (
-            header
-            + "\n------------------ Data summary      ------------------\n"
-            + data_summary
-            + "\n------------------ Score & algorithm ------------------\n"
-            + score_info
-            + "\n------------------ Machine learner   ------------------\n"
-            + learner_info
-            + "\n------------------ Resampling        ------------------\n"
-            + resampling_info
-            + "\n------------------ Fit summary       ------------------\n"
-            + fit_summary
-        )
-        return res
+    def _format_score_info_str(self):
+        lines = [
+            f"Score function: {str(self.score)}",
+            f"Treatment group: {str(self.g_value)}",
+            f"Pre-treatment period: {str(self.t_value_pre)}",
+            f"Evaluation period: {str(self.t_value_eval)}",
+            f"Control group: {str(self.control_group)}",
+            f"Anticipation periods: {str(self.anticipation_periods)}",
+            f"Effective sample size: {str(self.n_obs_subset)}",
+        ]
+        return "\\n".join(lines)
 
     @property
     def g_value(self):
diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index a6005d53..73b9152f 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -156,58 +156,19 @@ def __init__(
         self._sensitivity_implemented = True
         self._external_predictions_implemented = True
 
-    def __str__(self):
-        class_name = self.__class__.__name__
-        header = f"================== {class_name} Object ==================\n"
-        data_summary = self._dml_data._data_summary_str()
-        score_info = (
-            f"Score function: {str(self.score)}\n"
-            f"Treatment group: {str(self.g_value)}\n"
-            f"Pre-treatment period: {str(self.t_value_pre)}\n"
-            f"Evaluation period: {str(self.t_value_eval)}\n"
-            f"Control group: {str(self.control_group)}\n"
-            f"Anticipation periods: {str(self.anticipation_periods)}\n"
-            f"Effective sample size: {str(self.n_obs_subset)}\n"
-        )
-        learner_info = ""
-        for key, value in self.learner.items():
-            learner_info += f"Learner {key}: {str(value)}\n"
-        if self.nuisance_loss is not None:
-            learner_info += "Out-of-sample Performance:\n"
-            is_classifier = [value for value in self._is_classifier.values()]
-            is_regressor = [not value for value in is_classifier]
-            if any(is_regressor):
-                learner_info += "Regression:\n"
-                for learner in [key for key, value in self._is_classifier.items() if value is False]:
-                    learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n"
-            if any(is_classifier):
-                learner_info += "Classification:\n"
-                for learner in [key for key, value in self._is_classifier.items() if value is True]:
-                    learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n"
-
-        if self._is_cluster_data:
-            resampling_info = (
-                f"No. folds per cluster: {self._n_folds_per_cluster}\n"
-                f"No. folds: {self.n_folds}\n"
-                f"No. repeated sample splits: {self.n_rep}\n"
-            )
-        else:
-            resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n"
-        fit_summary = str(self.summary)
-        res = (
-            header
-            + "\n------------------ Data summary      ------------------\n"
-            + data_summary
-            + "\n------------------ Score & algorithm ------------------\n"
-            + score_info
-            + "\n------------------ Machine learner   ------------------\n"
-            + learner_info
-            + "\n------------------ Resampling        ------------------\n"
-            + resampling_info
-            + "\n------------------ Fit summary       ------------------\n"
-            + fit_summary
-        )
-        return res
+    def _format_score_info_str(self):
+        lines = [
+            f"Score function: {str(self.score)}",
+            f"Treatment group: {str(self.g_value)}",
+            f"Pre-treatment period: {str(self.t_value_pre)}",
+            f"Evaluation period: {str(self.t_value_eval)}",
+            f"Control group: {str(self.control_group)}",
+            f"Anticipation periods: {str(self.anticipation_periods)}",
+            f"Effective sample size: {str(self.n_obs_subset)}",
+        ]
+        return "\n".join(lines)
+
+    # _format_learner_info_str method is inherited from DoubleML base class.
 
     @property
     def g_value(self):
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 88f677ef..72f3b44a 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -110,50 +110,87 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting):
         self._i_rep = None
         self._i_treat = None
 
-    def __str__(self):
+    def _format_header_str(self):
         class_name = self.__class__.__name__
-        header = f"================== {class_name} Object ==================\n"
-        data_summary = self._dml_data._data_summary_str()
-        score_info = f"Score function: {str(self.score)}\n"
+        return f"================== {class_name} Object =================="
+
+    def _format_score_info_str(self):
+        return f"Score function: {str(self.score)}"
+
+    def _format_learner_info_str(self):
         learner_info = ""
-        for key, value in self.learner.items():
-            learner_info += f"Learner {key}: {str(value)}\n"
+        if self.learner is not None:
+            for key, value in self.learner.items():
+                learner_info += f"Learner {key}: {str(value)}\\n"
         if self.nuisance_loss is not None:
-            learner_info += "Out-of-sample Performance:\n"
-            is_classifier = [value for value in self._is_classifier.values()]
-            is_regressor = [not value for value in is_classifier]
-            if any(is_regressor):
-                learner_info += "Regression:\n"
-                for learner in [key for key, value in self._is_classifier.items() if value is False]:
-                    learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n"
-            if any(is_classifier):
-                learner_info += "Classification:\n"
-                for learner in [key for key, value in self._is_classifier.items() if value is True]:
-                    learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n"
+            learner_info += "Out-of-sample Performance:\\n"
+            # Check if _is_classifier is populated, otherwise, it might be called before fit
+            if self._is_classifier:
+                is_classifier_any = any(self._is_classifier.values())
+                is_regressor_any = any(not v for v in self._is_classifier.values())
+
+                if is_regressor_any:
+                    learner_info += "Regression:\\n"
+                    for learner_name in self.params_names:  # Iterate through known learners
+                        if not self._is_classifier.get(learner_name, True):  # Default to not regressor if not found
+                            loss_val = self.nuisance_loss.get(learner_name, "N/A")
+                            learner_info += f"Learner {learner_name} RMSE: {loss_val}\\n"
+                if is_classifier_any:
+                    learner_info += "Classification:\\n"
+                    for learner_name in self.params_names:  # Iterate through known learners
+                        if self._is_classifier.get(learner_name, False):  # Default to not classifier if not found
+                            loss_val = self.nuisance_loss.get(learner_name, "N/A")
+                            learner_info += f"Learner {learner_name} Log Loss: {loss_val}\\n"
+            else:
+                learner_info += " (Run .fit() to see out-of-sample performance)\\n"
+        return learner_info.strip()
 
+    def _format_resampling_info_str(self):
         if self._is_cluster_data:
-            resampling_info = (
-                f"No. folds per cluster: {self._n_folds_per_cluster}\n"
-                f"No. folds: {self.n_folds}\n"
-                f"No. repeated sample splits: {self.n_rep}\n"
+            return (
+                f"No. folds per cluster: {self._n_folds_per_cluster}\\\\n"
+                f"No. folds: {self.n_folds}\\\\n"
+                f"No. repeated sample splits: {self.n_rep}"
             )
         else:
-            resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n"
-        fit_summary = str(self.summary)
-        res = (
-            header
-            + "\n------------------ Data summary      ------------------\n"
-            + data_summary
-            + "\n------------------ Score & algorithm ------------------\n"
-            + score_info
-            + "\n------------------ Machine learner   ------------------\n"
-            + learner_info
-            + "\n------------------ Resampling        ------------------\n"
-            + resampling_info
-            + "\n------------------ Fit summary       ------------------\n"
-            + fit_summary
+            return f"No. folds: {self.n_folds}\\\\nNo. repeated sample splits: {self.n_rep}"
+
+    def _format_additional_info_str(self):
+        """
+        Hook for subclasses to add additional information to the string representation.
+        Returns an empty string by default.
+        Subclasses should override this method to provide content.
+        The content should not include the 'Additional Information' header itself.
+        """
+        return ""
+
+    def __str__(self):
+        header = self._format_header_str()
+        # Assumes self._dml_data._data_summary_str() exists and is well-formed
+        data_summary = self._dml_data._data_summary_str()
+        score_info = self._format_score_info_str()
+        learner_info = self._format_learner_info_str()
+        resampling_info = self._format_resampling_info_str()
+        fit_summary = str(self.summary)  # Assumes self.summary is well-formed
+
+        representation = (
+            f"{header}\\n"
+            f"\\n------------------ Data Summary      ------------------\\n"
+            f"{data_summary}\\n"
+            f"\\n------------------ Score & Algorithm ------------------\\n"
+            f"{score_info}\\n"
+            f"\\n------------------ Machine Learner   ------------------\\n"
+            f"{learner_info}\\n"
+            f"\\n------------------ Resampling        ------------------\\n"
+            f"{resampling_info}\\n"
+            f"\\n------------------ Fit Summary       ------------------\\n"
+            f"{fit_summary}"
         )
-        return res
+
+        additional_info = self._format_additional_info_str()
+        if additional_info:
+            representation += f"\\n\\n------------------ Additional Information ------------------\\n" f"{additional_info}"
+        return representation
 
     @property
     def n_folds(self):
diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py
index a43c0a03..b3cc11e7 100644
--- a/doubleml/irm/iivm.py
+++ b/doubleml/irm/iivm.py
@@ -197,22 +197,13 @@ def __init__(
         self.subgroups = subgroups
         self._external_predictions_implemented = True
 
-    def __str__(self):
-        parent_str = super().__str__()
-
-        # add robust confset
+    def _format_additional_info_str(self):
         if self.framework is None:
-            confset_str = ""
+            return ""
         else:
             confset = self.robust_confset()
             formatted_confset = ", ".join([f"[{lower:.4f}, {upper:.4f}]" for lower, upper in confset])
-            confset_str = (
-                "\n\n--------------- Additional Information ----------------\n"
-                + f"Robust Confidence Set: {formatted_confset}\n"
-            )
-
-        res = parent_str + confset_str
-        return res
+            return f"Robust Confidence Set: {formatted_confset}"
 
     @property
     def normalize_ipw(self):

From bf7e16af8a6b3dde11f7fd80c76549659b1e11a7 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 16 Jun 2025 12:09:09 +0200
Subject: [PATCH 71/84] fix format

---
 doubleml/double_ml.py | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 72f3b44a..694968bc 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -121,39 +121,39 @@ def _format_learner_info_str(self):
         learner_info = ""
         if self.learner is not None:
             for key, value in self.learner.items():
-                learner_info += f"Learner {key}: {str(value)}\\n"
+                learner_info += f"Learner {key}: {str(value)}\n"
         if self.nuisance_loss is not None:
-            learner_info += "Out-of-sample Performance:\\n"
+            learner_info += "Out-of-sample Performance:\n"
             # Check if _is_classifier is populated, otherwise, it might be called before fit
             if self._is_classifier:
                 is_classifier_any = any(self._is_classifier.values())
                 is_regressor_any = any(not v for v in self._is_classifier.values())
 
                 if is_regressor_any:
-                    learner_info += "Regression:\\n"
+                    learner_info += "Regression:\n"
                     for learner_name in self.params_names:  # Iterate through known learners
                         if not self._is_classifier.get(learner_name, True):  # Default to not regressor if not found
                             loss_val = self.nuisance_loss.get(learner_name, "N/A")
-                            learner_info += f"Learner {learner_name} RMSE: {loss_val}\\n"
+                            learner_info += f"Learner {learner_name} RMSE: {loss_val}\n"
                 if is_classifier_any:
-                    learner_info += "Classification:\\n"
+                    learner_info += "Classification:\n"
                     for learner_name in self.params_names:  # Iterate through known learners
                         if self._is_classifier.get(learner_name, False):  # Default to not classifier if not found
                             loss_val = self.nuisance_loss.get(learner_name, "N/A")
-                            learner_info += f"Learner {learner_name} Log Loss: {loss_val}\\n"
+                            learner_info += f"Learner {learner_name} Log Loss: {loss_val}\n"
             else:
-                learner_info += " (Run .fit() to see out-of-sample performance)\\n"
+                learner_info += " (Run .fit() to see out-of-sample performance)\n"
         return learner_info.strip()
 
     def _format_resampling_info_str(self):
         if self._is_cluster_data:
             return (
-                f"No. folds per cluster: {self._n_folds_per_cluster}\\\\n"
-                f"No. folds: {self.n_folds}\\\\n"
+                f"No. folds per cluster: {self._n_folds_per_cluster}\n"
+                f"No. folds: {self.n_folds}\n"
                 f"No. repeated sample splits: {self.n_rep}"
             )
         else:
-            return f"No. folds: {self.n_folds}\\\\nNo. repeated sample splits: {self.n_rep}"
+            return f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}"
 
     def _format_additional_info_str(self):
         """
@@ -174,22 +174,22 @@ def __str__(self):
         fit_summary = str(self.summary)  # Assumes self.summary is well-formed
 
         representation = (
-            f"{header}\\n"
-            f"\\n------------------ Data Summary      ------------------\\n"
-            f"{data_summary}\\n"
-            f"\\n------------------ Score & Algorithm ------------------\\n"
-            f"{score_info}\\n"
-            f"\\n------------------ Machine Learner   ------------------\\n"
-            f"{learner_info}\\n"
-            f"\\n------------------ Resampling        ------------------\\n"
-            f"{resampling_info}\\n"
-            f"\\n------------------ Fit Summary       ------------------\\n"
+            f"{header}\n"
+            f"\n------------------ Data Summary      ------------------\n"
+            f"{data_summary}\n"
+            f"\n------------------ Score & Algorithm ------------------\n"
+            f"{score_info}\n"
+            f"\n------------------ Machine Learner   ------------------\n"
+            f"{learner_info}\n"
+            f"\n------------------ Resampling        ------------------\n"
+            f"{resampling_info}\n"
+            f"\n------------------ Fit Summary       ------------------\n"
             f"{fit_summary}"
         )
 
         additional_info = self._format_additional_info_str()
         if additional_info:
-            representation += f"\\n\\n------------------ Additional Information ------------------\\n" f"{additional_info}"
+            representation += f"\n\n------------------ Additional Information ------------------\n" f"{additional_info}"
         return representation
 
     @property

From 6beebd83fd27b79aa445a348c96f44007bb8541f Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Tue, 17 Jun 2025 12:32:13 +0200
Subject: [PATCH 72/84] fix unit tests

---
 doubleml/did/tests/test_did_external_predictions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/did/tests/test_did_external_predictions.py b/doubleml/did/tests/test_did_external_predictions.py
index 7234be8e..194db374 100644
--- a/doubleml/did/tests/test_did_external_predictions.py
+++ b/doubleml/did/tests/test_did_external_predictions.py
@@ -24,7 +24,7 @@ def n_rep(request):
 @pytest.fixture(scope="module")
 def doubleml_did_fixture(did_score, n_rep):
     ext_predictions = {"d": {}}
-    dml_data = make_did_SZ2020(n_obs=500, return_type="DoubleMLData")
+    dml_data = make_did_SZ2020(n_obs=500, return_type="DoubleMLDIDData")
     all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
     kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "draw_sample_splitting": False}
     dml_did = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)

From fb421f7d6ba0009ae33d193d4dd0ae2dd4e3b849 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Tue, 17 Jun 2025 12:46:36 +0200
Subject: [PATCH 73/84] adjust workflow in parent class `DoubleML`

---
 doubleml/double_ml.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index fe4cec5d..6d2f2ca4 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -7,7 +7,7 @@
 from scipy.stats import norm
 from sklearn.base import is_classifier, is_regressor
 
-from doubleml.data import DoubleMLClusterData, DoubleMLPanelData
+from doubleml.data import DoubleMLPanelData, DoubleMLDIDData, DoubleMLSSMData, DoubleMLRDDData
 from doubleml.data.base_data import DoubleMLBaseData
 from doubleml.double_ml_framework import DoubleMLFramework
 from doubleml.utils._checks import _check_external_predictions, _check_sample_splitting
@@ -30,13 +30,22 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting):
                 f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
             )
         self._is_cluster_data = False
-        if isinstance(obj_dml_data, DoubleMLClusterData):
+        if obj_dml_data.is_cluster_data:
             if obj_dml_data.n_cluster_vars > 2:
                 raise NotImplementedError("Multi-way (n_ways > 2) clustering not yet implemented.")
             self._is_cluster_data = True
         self._is_panel_data = False
         if isinstance(obj_dml_data, DoubleMLPanelData):
             self._is_panel_data = True
+        self._is_did_data = False
+        if isinstance(obj_dml_data, DoubleMLDIDData):
+            self._is_did_data = True
+        self._is_ssm_data = False
+        if isinstance(obj_dml_data, DoubleMLSSMData):
+            self._is_ssm_data = True
+        self._is_rdd_data = False
+        if isinstance(obj_dml_data, DoubleMLRDDData):
+            self._is_rdd_data = True
 
         self._dml_data = obj_dml_data
         self._n_obs = self._dml_data.n_obs

From b11c0cbce798c6f8787bd5a0e7812b98b42c382a Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Tue, 17 Jun 2025 13:30:24 +0200
Subject: [PATCH 74/84] update refactoring acc. to unit test results

---
 doubleml/double_ml.py                              |  2 +-
 doubleml/irm/apos.py                               |  6 +++---
 doubleml/irm/qte.py                                |  5 ++---
 doubleml/irm/ssm.py                                | 14 +++++++-------
 doubleml/irm/tests/test_apo_exceptions.py          |  2 +-
 doubleml/irm/tests/test_apos_exceptions.py         |  2 +-
 doubleml/irm/tests/test_ssm_exceptions.py          |  2 +-
 .../datasets/dgp_pliv_multiway_cluster_CKMS2021.py |  4 ++--
 doubleml/rdd/rdd.py                                | 14 +++++++-------
 doubleml/tests/test_datasets.py                    |  2 +-
 doubleml/tests/test_exceptions.py                  |  5 +++--
 doubleml/tests/test_multiway_cluster.py            |  2 +-
 doubleml/tests/test_nonlinear_cluster.py           |  8 ++++----
 doubleml/tests/test_return_types.py                |  6 +++---
 doubleml/tests/test_sensitivity_cluster.py         |  4 ++--
 doubleml/utils/_check_return_types.py              |  5 ++---
 16 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 6d2f2ca4..818bb3ab 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -16,7 +16,7 @@
 from doubleml.utils.gain_statistics import gain_statistics
 from doubleml.utils.resampling import DoubleMLClusterResampling, DoubleMLResampling
 
-_implemented_data_backends = ["DoubleMLData", "DoubleMLClusterData"]
+_implemented_data_backends = ["DoubleMLData", "DoubleMLClusterData", "DoubleMLDIDData", "DoubleMLSSMData", "DoubleMLRDDData"]
 
 
 class DoubleML(ABC):
diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py
index 2960e90d..c272d0b4 100644
--- a/doubleml/irm/apos.py
+++ b/doubleml/irm/apos.py
@@ -6,7 +6,7 @@
 from joblib import Parallel, delayed
 from sklearn.base import clone
 
-from doubleml.data import DoubleMLClusterData, DoubleMLData
+from doubleml.data import DoubleMLData
 from doubleml.double_ml import DoubleML
 from doubleml.double_ml_framework import concat
 from doubleml.irm.apo import DoubleMLAPO
@@ -36,7 +36,7 @@ def __init__(
         draw_sample_splitting=True,
     ):
         self._dml_data = obj_dml_data
-        self._is_cluster_data = isinstance(obj_dml_data, DoubleMLClusterData)
+        self._is_cluster_data = obj_dml_data.is_cluster_data
         self._check_data(self._dml_data)
 
         self._all_treatment_levels = np.unique(self._dml_data.d)
@@ -824,7 +824,7 @@ def _check_treatment_levels(self, treatment_levels):
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError("The data must be of DoubleMLData or DoubleMLClusterData type.")
+            raise TypeError("The data must be of DoubleMLData type.")
         if obj_dml_data.z is not None:
             raise ValueError("The data must not contain instrumental variables.")
         return
diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py
index a2c803a3..de25b3ef 100644
--- a/doubleml/irm/qte.py
+++ b/doubleml/irm/qte.py
@@ -3,7 +3,7 @@
 from joblib import Parallel, delayed
 from sklearn.base import clone
 
-from doubleml.data import DoubleMLClusterData, DoubleMLData
+from doubleml.data import DoubleMLData
 from doubleml.double_ml_framework import concat
 from doubleml.irm.cvar import DoubleMLCVAR
 from doubleml.irm.lpq import DoubleMLLPQ
@@ -125,8 +125,7 @@ def __init__(
 
         # check data
         self._is_cluster_data = False
-        if isinstance(obj_dml_data, DoubleMLClusterData):
-            self._is_cluster_data = True
+        self._is_cluster_data = obj_dml_data.is_cluster_data
         self._check_data(self._dml_data)
 
         # initialize framework which is constructed after the fit method is called
diff --git a/doubleml/irm/ssm.py b/doubleml/irm/ssm.py
index c84b326d..2c8479a7 100644
--- a/doubleml/irm/ssm.py
+++ b/doubleml/irm/ssm.py
@@ -6,7 +6,7 @@
 from sklearn.model_selection import train_test_split
 from sklearn.utils import check_X_y
 
-from doubleml.data.base_data import DoubleMLData
+from doubleml.data.ssm_data import DoubleMLSSMData
 from doubleml.double_ml import DoubleML
 from doubleml.double_ml_score_mixins import LinearScoreMixin
 from doubleml.utils._checks import _check_finite_predictions, _check_score, _check_trimming
@@ -19,8 +19,8 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML):
 
     Parameters
     ----------
-    obj_dml_data : :class:`DoubleMLData` object
-        The :class:`DoubleMLData` object providing the data and specifying the variables for the causal model.
+    obj_dml_data : :class:`DoubleMLSSMData` object
+        The :class:`DoubleMLSSMData` object providing the data and specifying the variables for the causal model.
 
     ml_g : estimator implementing ``fit()`` and ``predict()``
         A machine learner implementing ``fit()`` and ``predict()`` methods (e.g.
@@ -66,7 +66,7 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml import DoubleMLData
+    >>> from doubleml import DoubleMLSSMData
     >>> from sklearn.linear_model import LassoCV, LogisticRegressionCV()
     >>> from sklearn.base import clone
     >>> np.random.seed(3146)
@@ -82,7 +82,7 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML):
     >>> s = np.where(np.dot(X, beta) + 0.25 * d + z + e[0] > 0, 1, 0)
     >>> y = np.dot(X, beta) + 0.5 * d + e[1]
     >>> y[s == 0] = 0
-    >>> simul_data = DoubleMLData.from_arrays(X, y, d, z=None, t=s)
+    >>> simul_data = DoubleMLSSMData.from_arrays(X, y, d, z=None, s=s)
     >>> learner = LassoCV()
     >>> learner_class = LogisticRegressionCV()
     >>> ml_g_sim = clone(learner)
@@ -183,9 +183,9 @@ def _initialize_ml_nuisance_params(self):
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner}
 
     def _check_data(self, obj_dml_data):
-        if not isinstance(obj_dml_data, DoubleMLData):
+        if not isinstance(obj_dml_data, DoubleMLSSMData):
             raise TypeError(
-                f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+                f"The data must be of DoubleMLSSMData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
             )
         if obj_dml_data.z_cols is not None and self._score == "missing-at-random":
             warnings.warn(
diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py
index e643efca..cb267a98 100644
--- a/doubleml/irm/tests/test_apo_exceptions.py
+++ b/doubleml/irm/tests/test_apo_exceptions.py
@@ -22,7 +22,7 @@
 
 @pytest.mark.ci
 def test_apo_exception_data():
-    msg = "The data must be of DoubleMLData or DoubleMLClusterData type."
+    msg = "The data must be of DoubleMLData type."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLAPO(pd.DataFrame(), ml_g, ml_m, treatment_level=0)
 
diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py
index f1c9b3d6..93274cee 100644
--- a/doubleml/irm/tests/test_apos_exceptions.py
+++ b/doubleml/irm/tests/test_apos_exceptions.py
@@ -20,7 +20,7 @@
 
 @pytest.mark.ci
 def test_apos_exception_data():
-    msg = "The data must be of DoubleMLData or DoubleMLClusterData type."
+    msg = "The data must be of DoubleMLData type."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLAPOS(pd.DataFrame(), ml_g, ml_m, treatment_levels=0)
 
diff --git a/doubleml/irm/tests/test_ssm_exceptions.py b/doubleml/irm/tests/test_ssm_exceptions.py
index 50b082ec..ee67dbec 100644
--- a/doubleml/irm/tests/test_ssm_exceptions.py
+++ b/doubleml/irm/tests/test_ssm_exceptions.py
@@ -30,7 +30,7 @@ def n_coefs(self):
 
 @pytest.mark.ci
 def test_ssm_exception_data():
-    msg = "The data must be of DoubleMLData or DoubleMLClusterData type."
+    msg = "The data must be of DoubleMLData type."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLSSM(pd.DataFrame(), ml_g, ml_pi, ml_m)
 
diff --git a/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py b/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
index 39ff6a26..0d64c42f 100644
--- a/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
+++ b/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
@@ -62,8 +62,8 @@ def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1.0, return
     theta :
         The value of the causal parameter.
     return_type :
-        If ``'DoubleMLClusterData'`` or ``DoubleMLClusterData``, returns a ``DoubleMLClusterData`` object where
-        ``DoubleMLClusterData.data`` is a ``pd.DataFrame``.
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object where
+        ``DoubleMLData.data`` is a ``pd.DataFrame``.
 
         If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
 
diff --git a/doubleml/rdd/rdd.py b/doubleml/rdd/rdd.py
index 858ae5ed..565f0241 100644
--- a/doubleml/rdd/rdd.py
+++ b/doubleml/rdd/rdd.py
@@ -7,7 +7,7 @@
 from sklearn.base import clone
 from sklearn.utils.multiclass import type_of_target
 
-from doubleml import DoubleMLData
+from doubleml import DoubleMLRDDData
 from doubleml.double_ml import DoubleML
 from doubleml.rdd._utils import _is_rdrobust_available
 from doubleml.utils._checks import _check_resampling_specification, _check_supports_sample_weights
@@ -82,7 +82,7 @@ class RDFlex:
     >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
     >>> np.random.seed(123)
     >>> data_dict = make_simple_rdd_data(fuzzy=True)
-    >>> obj_dml_data = dml.DoubleMLData.from_arrays(x=data_dict["X"], y=data_dict["Y"], d=data_dict["D"], s=data_dict["score"])
+    >>> obj_dml_data = dml.DoubleMLRDDData.from_arrays(x=data_dict["X"], y=data_dict["Y"], d=data_dict["D"], s=data_dict["score"])
     >>> ml_g = RandomForestRegressor()
     >>> ml_m = RandomForestClassifier()
     >>> rdflex_obj = dml.rdd.RDFlex(obj_dml_data, ml_g, ml_m, fuzzy=True)
@@ -482,21 +482,21 @@ def _initialize_arrays(self):
         return M_Y, M_D, h, rdd_obj, all_coef, all_se, all_ci
 
     def _check_data(self, obj_dml_data, cutoff):
-        if not isinstance(obj_dml_data, DoubleMLData):
+        if not isinstance(obj_dml_data, DoubleMLRDDData):
             raise TypeError(
-                f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+                f"The data must be of DoubleMLRDDData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
             )
 
         # score checks
-        if obj_dml_data.s_col is None:
+        if obj_dml_data.score_col is None:
             raise ValueError("Incompatible data. " + "Score variable has not been set. ")
-        is_continuous = type_of_target(obj_dml_data.s) == "continuous"
+        is_continuous = type_of_target(obj_dml_data.score) == "continuous"
         if not is_continuous:
             raise ValueError("Incompatible data. " + "Score variable has to be continuous. ")
 
         if not isinstance(cutoff, (int, float)):
             raise TypeError(f"Cutoff value has to be a float or int. Object of type {str(type(cutoff))} passed.")
-        if not (obj_dml_data.s.min() <= cutoff <= obj_dml_data.s.max()):
+        if not (obj_dml_data.score.min() <= cutoff <= obj_dml_data.score.max()):
             raise ValueError("Cutoff value is not within the range of the score variable. ")
 
         # treatment checks
diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py
index b31091a6..f69b681e 100644
--- a/doubleml/tests/test_datasets.py
+++ b/doubleml/tests/test_datasets.py
@@ -2,7 +2,7 @@
 import pandas as pd
 import pytest
 
-from doubleml import DoubleMLClusterData, DoubleMLData
+from doubleml import DoubleMLData
 from doubleml.datasets import fetch_401K, fetch_bonus
 from doubleml.irm.datasets import (
     make_confounded_irm_data,
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index 5178adc6..7839d7c4 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -54,7 +54,7 @@
 
 @pytest.mark.ci
 def test_doubleml_exception_data():
-    msg = "The data must be of DoubleMLData or DoubleMLClusterData type."
+    msg = "The data must be of DoubleMLData or DoubleMLClusterData or DoubleMLDIDData or DoubleMLSSMData or DoubleMLRDDData type."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLPLR(pd.DataFrame(), ml_l, ml_m)
 
@@ -1351,13 +1351,14 @@ def test_doubleml_cluster_not_yet_implemented():
 
     df = dml_cluster_data_pliv.data.copy()
     df["cluster_var_k"] = df["cluster_var_i"] + df["cluster_var_j"] - 2
-    dml_cluster_data_multiway = DoubleMLClusterData(
+    dml_cluster_data_multiway = DoubleMLData(
         df,
         y_col="Y",
         d_cols="D",
         x_cols=["X1", "X5"],
         z_cols="Z",
         cluster_cols=["cluster_var_i", "cluster_var_j", "cluster_var_k"],
+        is_cluster_data=True,
     )
     assert dml_cluster_data_multiway.n_cluster_vars == 3
     msg = r"Multi-way \(n_ways > 2\) clustering not yet implemented."
diff --git a/doubleml/tests/test_multiway_cluster.py b/doubleml/tests/test_multiway_cluster.py
index 10e5d445..4537cb4d 100644
--- a/doubleml/tests/test_multiway_cluster.py
+++ b/doubleml/tests/test_multiway_cluster.py
@@ -288,7 +288,7 @@ def dml_plr_cluster_with_index(generate_data1, learner):
     dml_plr_obj.fit()
 
     df = data.reset_index()
-    dml_cluster_data = dml.DoubleMLClusterData(df, y_col="y", d_cols="d", x_cols=x_cols, cluster_cols="index")
+    dml_cluster_data = dml.DoubleMLData(df, y_col="y", d_cols="d", x_cols=x_cols, cluster_cols="index", is_cluster_data=True)
     np.random.seed(3141)
     dml_plr_cluster_obj = dml.DoubleMLPLR(dml_cluster_data, ml_l, ml_m, n_folds=n_folds)
     np.random.seed(3141)
diff --git a/doubleml/tests/test_nonlinear_cluster.py b/doubleml/tests/test_nonlinear_cluster.py
index 71998941..6f19b511 100644
--- a/doubleml/tests/test_nonlinear_cluster.py
+++ b/doubleml/tests/test_nonlinear_cluster.py
@@ -7,7 +7,7 @@
 from sklearn.linear_model import Lasso, LinearRegression
 
 import doubleml as dml
-from doubleml import DoubleMLClusterData
+from doubleml import DoubleMLData
 from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
 
 from .test_nonlinear_score_mixin import DoubleMLPLRWithNonLinearScoreMixin
@@ -20,7 +20,7 @@
 
 # create data without insturment for plr
 x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x, return_type="array")
-obj_dml_cluster_data = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
+obj_dml_cluster_data = DoubleMLData.from_arrays(x, y, d, cluster_vars, is_cluster_data=True)
 
 x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(
     N,
@@ -32,7 +32,7 @@
     omega_V=np.array([0.25, 0]),
     return_type="array",
 )
-obj_dml_oneway_cluster_data = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
+obj_dml_oneway_cluster_data = DoubleMLData.from_arrays(x, y, d, cluster_vars, is_cluster_data=True)
 
 # only the first cluster variable is relevant with the weight setting above
 obj_dml_oneway_cluster_data.cluster_cols = "cluster_var1"
@@ -196,7 +196,7 @@ def dml_plr_cluster_nonlinear_with_index(generate_data1, learner):
     dml_plr_obj.fit()
 
     df = data.reset_index()
-    dml_cluster_data = dml.DoubleMLClusterData(df, y_col="y", d_cols="d", x_cols=x_cols, cluster_cols="index")
+    dml_cluster_data = dml.DoubleMLData(df, y_col="y", d_cols="d", x_cols=x_cols, cluster_cols="index", is_cluster_data=True)
     np.random.seed(3141)
     dml_plr_cluster_obj = DoubleMLPLRWithNonLinearScoreMixin(dml_cluster_data, ml_l, ml_m, n_folds=n_folds)
     dml_plr_cluster_obj.fit()
diff --git a/doubleml/tests/test_return_types.py b/doubleml/tests/test_return_types.py
index 03676b74..fdb680f3 100644
--- a/doubleml/tests/test_return_types.py
+++ b/doubleml/tests/test_return_types.py
@@ -8,7 +8,7 @@
 
 from doubleml import (
     DoubleMLAPO,
-    DoubleMLClusterData,
+    DoubleMLData,
     DoubleMLCVAR,
     DoubleMLData,
     DoubleMLDID,
@@ -86,14 +86,14 @@ def test_return_types(dml_obj, cls):
     if not dml_obj._is_cluster_data:
         assert isinstance(dml_obj.set_sample_splitting(dml_obj.smpls), cls)
     else:
-        assert isinstance(dml_obj._dml_data, DoubleMLClusterData)
+        assert dml_obj._dml_data.is_cluster_data
     assert isinstance(dml_obj.fit(), cls)
     assert isinstance(dml_obj.__str__(), str)  # called again after fit, now with numbers
     assert isinstance(dml_obj.summary, pd.DataFrame)  # called again after fit, now with numbers
     if not dml_obj._is_cluster_data:
         assert isinstance(dml_obj.bootstrap(), cls)
     else:
-        assert isinstance(dml_obj._dml_data, DoubleMLClusterData)
+        assert dml_obj._dml_data.is_cluster_data
     assert isinstance(dml_obj.confint(), pd.DataFrame)
     if not dml_obj._is_cluster_data:
         assert isinstance(dml_obj.p_adjust(), pd.DataFrame)
diff --git a/doubleml/tests/test_sensitivity_cluster.py b/doubleml/tests/test_sensitivity_cluster.py
index 83f8c270..5b6a7f1e 100644
--- a/doubleml/tests/test_sensitivity_cluster.py
+++ b/doubleml/tests/test_sensitivity_cluster.py
@@ -17,7 +17,7 @@
 
 
 (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x, return_type="array")
-obj_dml_cluster_data = dml.DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
+obj_dml_cluster_data = dml.DoubleMData.from_arrays(x, y, d, cluster_vars, is_cluster_data=True)
 
 (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(
     N,
@@ -29,7 +29,7 @@
     omega_V=np.array([0.25, 0]),
     return_type="array",
 )
-obj_dml_oneway_cluster_data = dml.DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
+obj_dml_oneway_cluster_data = dml.DoubleMLData.from_arrays(x, y, d, cluster_vars, is_cluster_data=True)
 # only the first cluster variable is relevant with the weight setting above
 obj_dml_oneway_cluster_data.cluster_cols = "cluster_var1"
 
diff --git a/doubleml/utils/_check_return_types.py b/doubleml/utils/_check_return_types.py
index 54462059..5d93873e 100644
--- a/doubleml/utils/_check_return_types.py
+++ b/doubleml/utils/_check_return_types.py
@@ -3,7 +3,6 @@
 import plotly
 
 from doubleml import DoubleMLFramework
-from doubleml.data import DoubleMLClusterData
 from doubleml.double_ml_score_mixins import NonLinearScoreMixin
 
 
@@ -15,14 +14,14 @@ def check_basic_return_types(dml_obj, cls):
     if not dml_obj._is_cluster_data:
         assert isinstance(dml_obj.set_sample_splitting(dml_obj.smpls), cls)
     else:
-        assert isinstance(dml_obj._dml_data, DoubleMLClusterData)
+        assert dml_obj._dml_data.is_cluster_data
     assert isinstance(dml_obj.fit(), cls)
     assert isinstance(dml_obj.__str__(), str)  # called again after fit, now with numbers
     assert isinstance(dml_obj.summary, pd.DataFrame)  # called again after fit, now with numbers
     if not dml_obj._is_cluster_data:
         assert isinstance(dml_obj.bootstrap(), cls)
     else:
-        assert isinstance(dml_obj._dml_data, DoubleMLClusterData)
+        assert dml_obj._dml_data.is_cluster_data
     assert isinstance(dml_obj.confint(), pd.DataFrame)
     if not dml_obj._is_cluster_data:
         assert isinstance(dml_obj.p_adjust(), pd.DataFrame)

From b9bdf7c302b165d41429368996e60b5df15ffe0f Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Tue, 17 Jun 2025 14:51:02 +0200
Subject: [PATCH 75/84] add check for correct data backend

---
 doubleml/did/did_cs.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py
index 7f33210f..7dab27ed 100644
--- a/doubleml/did/did_cs.py
+++ b/doubleml/did/did_cs.py
@@ -4,7 +4,7 @@
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
-from doubleml.data.base_data import DoubleMLData
+from doubleml.data.did_data import DoubleMLDIDData
 from doubleml.double_ml import DoubleML
 from doubleml.double_ml_score_mixins import LinearScoreMixin
 from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming
@@ -177,9 +177,9 @@ def _initialize_ml_nuisance_params(self):
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner}
 
     def _check_data(self, obj_dml_data):
-        if not isinstance(obj_dml_data, DoubleMLData):
+        if not isinstance(obj_dml_data, DoubleMLDIDData):
             raise TypeError(
-                "For repeated cross sections the data must be of DoubleMLData type. "
+                "For repeated cross sections the data must be of DoubleMLDIDData type. "
                 f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
             )
         if obj_dml_data.z_cols is not None:

From 4f70523525731456a4148a72dd8d4b9b7b0a4e0c Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Tue, 17 Jun 2025 14:51:17 +0200
Subject: [PATCH 76/84] renaming after refactoring

---
 doubleml/rdd/rdd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/rdd/rdd.py b/doubleml/rdd/rdd.py
index 565f0241..195fbba4 100644
--- a/doubleml/rdd/rdd.py
+++ b/doubleml/rdd/rdd.py
@@ -115,7 +115,7 @@ def __init__(
         self._check_data(obj_dml_data, cutoff)
         self._dml_data = obj_dml_data
 
-        self._score = self._dml_data.s - cutoff
+        self._score = self._dml_data.score - cutoff
         self._cutoff = cutoff
         self._intendend_treatment = (self._score >= 0).astype(bool)
         self._fuzzy = fuzzy

From 19eab819b7bc4504a008ecef967a586a9f8b1df9 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Tue, 17 Jun 2025 14:51:41 +0200
Subject: [PATCH 77/84] adjust dummy data (is_cluster_data flag)

---
 doubleml/tests/_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doubleml/tests/_utils.py b/doubleml/tests/_utils.py
index a241b58a..577ed7ed 100644
--- a/doubleml/tests/_utils.py
+++ b/doubleml/tests/_utils.py
@@ -9,8 +9,9 @@
 
 
 class DummyDataClass(DoubleMLBaseData):
-    def __init__(self, data):
+    def __init__(self, data, is_cluster_data=False):
         DoubleMLBaseData.__init__(self, data)
+        self.is_cluster_data = is_cluster_data
 
     @property
     def n_coefs(self):

From c3fbbb8f00a9ae54ccd44175d2fa1a16d7c5c3b3 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Tue, 17 Jun 2025 14:51:47 +0200
Subject: [PATCH 78/84] adjust unit tests

---
 doubleml/rdd/tests/conftest.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doubleml/rdd/tests/conftest.py b/doubleml/rdd/tests/conftest.py
index b279ea93..75c9272b 100644
--- a/doubleml/rdd/tests/conftest.py
+++ b/doubleml/rdd/tests/conftest.py
@@ -3,7 +3,7 @@
 import pytest
 from sklearn.dummy import DummyClassifier, DummyRegressor
 
-from doubleml import DoubleMLData
+from doubleml import DoubleMLRDDData
 from doubleml.rdd import RDFlex
 from doubleml.rdd._utils import _is_rdrobust_available
 from doubleml.rdd.datasets import make_simple_rdd_data
@@ -24,7 +24,7 @@ def predict_dummy():
     - make predictions using rdrobust as a reference
     """
 
-    def _predict_dummy(data: DoubleMLData, cutoff, alpha, n_rep, p, fs_specification, ml_g=ml_g_dummy):
+    def _predict_dummy(data: DoubleMLRDDData, cutoff, alpha, n_rep, p, fs_specification, ml_g=ml_g_dummy):
         dml_rdflex = RDFlex(
             data, ml_g=ml_g, ml_m=ml_m_dummy, cutoff=cutoff, n_rep=n_rep, p=p, fs_specification=fs_specification
         )
@@ -81,7 +81,7 @@ def generate_data(n_obs: int, fuzzy: str, cutoff: float, binary_outcome: bool =
 
     columns = ["y", "d", "score"] + ["x" + str(i) for i in range(data["X"].shape[1])]
     df = pd.DataFrame(np.column_stack((data["Y"], data["D"], data["score"], data["X"])), columns=columns)
-    return DoubleMLData(df, y_col="y", d_cols="d", s_col="score")
+    return DoubleMLRDDData(df, y_col="y", d_cols="d", score_col="score")
 
 
 @pytest.fixture(scope="module")

From 144ee607f43c93a6c1165e3b904414147e6d53e5 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Tue, 17 Jun 2025 15:50:56 +0200
Subject: [PATCH 79/84] adjust t_col setter for DIDData Backend

---
 doubleml/data/base_data.py | 3 ++-
 doubleml/data/did_data.py  | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/doubleml/data/base_data.py b/doubleml/data/base_data.py
index 2297944e..88cf5379 100644
--- a/doubleml/data/base_data.py
+++ b/doubleml/data/base_data.py
@@ -263,7 +263,8 @@ def from_arrays(
             Default is ``True``.
 
         Examples
-        --------        >>> from doubleml import DoubleMLData
+        --------
+        >>> from doubleml import DoubleMLData
         >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
         >>> (x, y, d) = make_plr_CCDDHNR2018(return_type='array')
         >>> obj_dml_data_from_array = DoubleMLData.from_arrays(x, y, d)
diff --git a/doubleml/data/did_data.py b/doubleml/data/did_data.py
index fd4fc7de..414cdc5b 100644
--- a/doubleml/data/did_data.py
+++ b/doubleml/data/did_data.py
@@ -300,7 +300,7 @@ def _check_disjoint_sets_t_col(self):
     def _set_time_var(self):
         """Set the time variable array."""
         if hasattr(self, "_data") and self.t_col in self.data.columns:
-            self._t = self.data.loc[:, [self.t_col]]
+            self._t = self.data.loc[:, self.t_col]
 
     def _set_y_z_t(self):
         def _set_attr(col):

From 70d67ad539a7a15fac8dbb968cf4c8d445c49e9d Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Tue, 17 Jun 2025 16:15:31 +0200
Subject: [PATCH 80/84] fix RDDData (finally...)

---
 doubleml/data/rdd_data.py      | 2 +-
 doubleml/rdd/tests/conftest.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/data/rdd_data.py b/doubleml/data/rdd_data.py
index f19a4fa0..16f9e1c0 100644
--- a/doubleml/data/rdd_data.py
+++ b/doubleml/data/rdd_data.py
@@ -261,7 +261,7 @@ def _check_disjoint_sets_score_col(self):
     def _set_score_var(self):
         """Set the score variable array."""
         if hasattr(self, "_data") and self.score_col in self.data.columns:
-            self._score = self.data.loc[:, [self.score_col]]
+            self._score = self.data.loc[:, self.score_col]
 
     def __str__(self):
         """String representation."""
diff --git a/doubleml/rdd/tests/conftest.py b/doubleml/rdd/tests/conftest.py
index 75c9272b..9d13deaf 100644
--- a/doubleml/rdd/tests/conftest.py
+++ b/doubleml/rdd/tests/conftest.py
@@ -35,7 +35,7 @@ def _predict_dummy(data: DoubleMLRDDData, cutoff, alpha, n_rep, p, fs_specificat
             msg = "rdrobust is not installed. Please install it using 'pip install DoubleML[rdd]'"
             raise ImportError(msg)
 
-        rdrobust_model = rdrobust.rdrobust(y=data.y, x=data.s, c=cutoff, level=100 * (1 - alpha), p=p)
+        rdrobust_model = rdrobust.rdrobust(y=data.y, x=data.score, c=cutoff, level=100 * (1 - alpha), p=p)
 
         reference = {
             "model": rdrobust_model,

From a322e359d5dc2e257fbb35c65f7e976569d337d7 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Tue, 17 Jun 2025 16:31:26 +0200
Subject: [PATCH 81/84] adjsut RDD Class

---
 doubleml/rdd/rdd.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/rdd/rdd.py b/doubleml/rdd/rdd.py
index 195fbba4..045789c3 100644
--- a/doubleml/rdd/rdd.py
+++ b/doubleml/rdd/rdd.py
@@ -22,8 +22,8 @@ class RDFlex:
 
     Parameters
     ----------
-    obj_dml_data : :class:`DoubleMLData` object
-        The :class:`DoubleMLData` object providing the data and specifying the variables for the causal model.
+    obj_dml_data : :class:`DoubleMLRDDData` object
+        The :class:`DoubleMLRDDData` object providing the data and specifying the variables for the causal model.
 
     ml_g : estimator implementing ``fit()`` and ``predict()``
         A machine learner implementing ``fit()`` and ``predict()`` methods and support ``sample_weights`` (e.g.

From 0a9b3c7e32948aff252dc51a972c90425bdb521d Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Tue, 17 Jun 2025 16:31:43 +0200
Subject: [PATCH 82/84] adjust DID classes

---
 doubleml/did/did.py    | 12 ++++++------
 doubleml/did/did_cs.py |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/doubleml/did/did.py b/doubleml/did/did.py
index 170535ea..580d805e 100644
--- a/doubleml/did/did.py
+++ b/doubleml/did/did.py
@@ -4,7 +4,7 @@
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
-from doubleml.data.base_data import DoubleMLData
+from doubleml.data.did_data import DoubleMLDIDData
 from doubleml.double_ml import DoubleML
 from doubleml.double_ml_score_mixins import LinearScoreMixin
 from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming
@@ -17,8 +17,8 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
 
     Parameters
     ----------
-    obj_dml_data : :class:`DoubleMLData` object
-        The :class:`DoubleMLData` object providing the data and specifying the variables for the causal model.
+    obj_dml_data : :class:`DoubleMLDIDData` object
+        The :class:`DoubleMLDIDData` object providing the data and specifying the variables for the causal model.
 
     ml_g : estimator implementing ``fit()`` and ``predict()``
         A machine learner implementing ``fit()`` and ``predict()`` methods (e.g.
@@ -71,7 +71,7 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
     >>> ml_g = RandomForestRegressor(n_estimators=100, max_depth=5, min_samples_leaf=5)
     >>> ml_m = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_leaf=5)
     >>> data = make_did_SZ2020(n_obs=500, return_type='DataFrame')
-    >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
+    >>> obj_dml_data = dml.DoubleMLDIDData(data, 'y', 'd')
     >>> dml_did_obj = dml.DoubleMLDID(obj_dml_data, ml_g, ml_m)
     >>> dml_did_obj.fit().summary
            coef   std err         t     P>|t|     2.5 %   97.5 %
@@ -176,9 +176,9 @@ def _initialize_ml_nuisance_params(self):
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner}
 
     def _check_data(self, obj_dml_data):
-        if not isinstance(obj_dml_data, DoubleMLData):
+        if not isinstance(obj_dml_data, DoubleMLDIDData):
             raise TypeError(
-                "For repeated outcomes the data must be of DoubleMLData type. "
+                "For repeated outcomes the data must be of DoubleMLDIDData type. "
                 f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
             )
         if obj_dml_data.z_cols is not None:
diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py
index bd7d59dd..38cc4952 100644
--- a/doubleml/did/did_cs.py
+++ b/doubleml/did/did_cs.py
@@ -17,8 +17,8 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
 
     Parameters
     ----------
-    obj_dml_data : :class:`DoubleMLData` object
-        The :class:`DoubleMLData` object providing the data and specifying the variables for the causal model.
+    obj_dml_data : :class:`DoubleMLDIDData` object
+        The :class:`DoubleMLDIDData` object providing the data and specifying the variables for the causal model.
 
     ml_g : estimator implementing ``fit()`` and ``predict()``
         A machine learner implementing ``fit()`` and ``predict()`` methods (e.g.
@@ -71,7 +71,7 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
     >>> ml_g = RandomForestRegressor(n_estimators=100, max_depth=5, min_samples_leaf=5)
     >>> ml_m = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_leaf=5)
     >>> data = make_did_SZ2020(n_obs=500, cross_sectional_data=True, return_type='DataFrame')
-    >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd', t_col='t')
+    >>> obj_dml_data = dml.DoubleMLDIDData(data, 'y', 'd', t_col='t')
     >>> dml_did_obj = dml.DoubleMLDIDCS(obj_dml_data, ml_g, ml_m)
     >>> dml_did_obj.fit().summary
            coef   std err         t     P>|t|      2.5 %     97.5 %

From 37f11dced954198a6e455f1b8a182ec08c3a28a3 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Tue, 17 Jun 2025 16:32:08 +0200
Subject: [PATCH 83/84] Adjust unit tests for DID

---
 doubleml/did/tests/test_did.py                              | 4 ++--
 doubleml/did/tests/test_did_binary_exceptions.py            | 2 +-
 doubleml/did/tests/test_did_binary_tune.py                  | 2 +-
 doubleml/did/tests/test_did_binary_vs_did_panel.py          | 2 +-
 doubleml/did/tests/test_did_binary_vs_did_two_period.py     | 2 +-
 doubleml/did/tests/test_did_cs.py                           | 4 ++--
 doubleml/did/tests/test_did_cs_binary_exceptions.py         | 2 +-
 doubleml/did/tests/test_did_cs_binary_tune.py               | 2 +-
 doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py    | 2 +-
 .../did/tests/test_did_cs_binary_vs_did_cs_two_period.py    | 2 +-
 doubleml/did/tests/test_did_tune.py                         | 2 +-
 doubleml/did/tests/test_return_types.py                     | 6 +++---
 12 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/doubleml/did/tests/test_did.py b/doubleml/did/tests/test_did.py
index 90d53a95..79feb110 100644
--- a/doubleml/did/tests/test_did.py
+++ b/doubleml/did/tests/test_did.py
@@ -57,7 +57,7 @@ def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization,
     np.random.seed(3141)
     n_obs = len(y)
     all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d)
-    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
+    obj_dml_data = dml.DoubleMLDIDData.from_arrays(x, y, d)
 
     np.random.seed(3141)
     dml_did_obj = dml.DoubleMLDID(
@@ -182,7 +182,7 @@ def test_dml_did_experimental(generate_data_did, in_sample_normalization, learne
     ml_m = clone(learner[1])
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
+    obj_dml_data = dml.DoubleMLDIDData.from_arrays(x, y, d)
 
     np.random.seed(3141)
     dml_did_obj_without_ml_m = dml.DoubleMLDID(
diff --git a/doubleml/did/tests/test_did_binary_exceptions.py b/doubleml/did/tests/test_did_binary_exceptions.py
index c7aa2395..78c09a94 100644
--- a/doubleml/did/tests/test_did_binary_exceptions.py
+++ b/doubleml/did/tests/test_did_binary_exceptions.py
@@ -85,7 +85,7 @@ def test_check_data_exceptions():
 
     # Test 1: Data has to be DoubleMLPanelData
     invalid_data_types = [
-        dml.data.DoubleMLData(df, y_col="Col_0", d_cols="Col_1"),
+        dml.data.DoubleMLDIDData(df, y_col="Col_0", d_cols="Col_1"),
     ]
 
     for invalid_data in invalid_data_types:
diff --git a/doubleml/did/tests/test_did_binary_tune.py b/doubleml/did/tests/test_did_binary_tune.py
index a817223f..0962aa5b 100644
--- a/doubleml/did/tests/test_did_binary_tune.py
+++ b/doubleml/did/tests/test_did_binary_tune.py
@@ -64,7 +64,7 @@ def dml_did_fixture(generate_data_did_binary, learner_g, learner_m, score, in_sa
 
     n_obs = df_panel.shape[0]
     all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=df_panel["d"])
-    obj_dml_data = dml.DoubleMLData(df_panel, y_col="y", d_cols="d", x_cols=["Z1", "Z2", "Z3", "Z4"])
+    obj_dml_data = dml.DoubleMLDIDData(df_panel, y_col="y", d_cols="d", x_cols=["Z1", "Z2", "Z3", "Z4"])
 
     # Set machine learning methods for m & g
     ml_g = clone(learner_g)
diff --git a/doubleml/did/tests/test_did_binary_vs_did_panel.py b/doubleml/did/tests/test_did_binary_vs_did_panel.py
index 426b413c..2eddccaf 100644
--- a/doubleml/did/tests/test_did_binary_vs_did_panel.py
+++ b/doubleml/did/tests/test_did_binary_vs_did_panel.py
@@ -79,7 +79,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza
     dml_did_binary_obj.fit()
 
     df_wide = dml_did_binary_obj.data_subset.copy()
-    dml_data = dml.data.DoubleMLData(df_wide, y_col="y_diff", d_cols="G_indicator", x_cols=["Z1", "Z2", "Z3", "Z4"])
+    dml_data = dml.data.DoubleMLDIDData(df_wide, y_col="y_diff", d_cols="G_indicator", x_cols=["Z1", "Z2", "Z3", "Z4"])
     dml_did_obj = dml.DoubleMLDID(
         dml_data,
         **dml_args,
diff --git a/doubleml/did/tests/test_did_binary_vs_did_two_period.py b/doubleml/did/tests/test_did_binary_vs_did_two_period.py
index 0db2a752..74575664 100644
--- a/doubleml/did/tests/test_did_binary_vs_did_two_period.py
+++ b/doubleml/did/tests/test_did_binary_vs_did_two_period.py
@@ -56,7 +56,7 @@ def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_s
 
     n_obs = df_panel.shape[0]
     all_smpls = draw_smpls(n_obs, n_folds)
-    obj_dml_data = dml.DoubleMLData(df_panel, y_col="y", d_cols="d", x_cols=["Z1", "Z2", "Z3", "Z4"])
+    obj_dml_data = dml.DoubleMLDIDData(df_panel, y_col="y", d_cols="d", x_cols=["Z1", "Z2", "Z3", "Z4"])
 
     # Set machine learning methods for m & g
     ml_g = clone(learner[0])
diff --git a/doubleml/did/tests/test_did_cs.py b/doubleml/did/tests/test_did_cs.py
index ae633588..bc8e2da6 100644
--- a/doubleml/did/tests/test_did_cs.py
+++ b/doubleml/did/tests/test_did_cs.py
@@ -59,7 +59,7 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza
     n_obs = len(y)
 
     all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d + 2 * t)
-    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, t=t)
+    obj_dml_data = dml.DoubleMLDIDData.from_arrays(x, y, d, t=t)
 
     np.random.seed(3141)
     dml_did_cs_obj = dml.DoubleMLDIDCS(
@@ -185,7 +185,7 @@ def test_dml_did_cs_experimental(generate_data_did_cs, in_sample_normalization,
     ml_m = clone(learner[1])
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, t=t)
+    obj_dml_data = dml.DoubleMLDIDData.from_arrays(x, y, d, t=t)
 
     np.random.seed(3141)
     dml_did_obj_without_ml_m = dml.DoubleMLDIDCS(
diff --git a/doubleml/did/tests/test_did_cs_binary_exceptions.py b/doubleml/did/tests/test_did_cs_binary_exceptions.py
index b506da2d..e8d33939 100644
--- a/doubleml/did/tests/test_did_cs_binary_exceptions.py
+++ b/doubleml/did/tests/test_did_cs_binary_exceptions.py
@@ -85,7 +85,7 @@ def test_check_data_exceptions():
 
     # Test 1: Data has to be DoubleMLPanelData
     invalid_data_types = [
-        dml.data.DoubleMLData(df, y_col="Col_0", d_cols="Col_1"),
+        dml.data.DoubleMLDIDData(df, y_col="Col_0", d_cols="Col_1"),
     ]
 
     for invalid_data in invalid_data_types:
diff --git a/doubleml/did/tests/test_did_cs_binary_tune.py b/doubleml/did/tests/test_did_cs_binary_tune.py
index 0bd2c6ab..59db23dd 100644
--- a/doubleml/did/tests/test_did_cs_binary_tune.py
+++ b/doubleml/did/tests/test_did_cs_binary_tune.py
@@ -63,7 +63,7 @@ def dml_did_fixture(generate_data_did_binary, learner_g, learner_m, score, in_sa
     dml_panel_data = dml.data.DoubleMLPanelData(
         df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]
     )
-    obj_dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"])
+    obj_dml_data = dml.DoubleMLDIDData(df, y_col="y", d_cols="d", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"])
 
     n_obs = df.shape[0]
     strata = df["d"] + 2 * df["t"]  # only valid since it values are binary
diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py
index 8fab2615..da7db085 100644
--- a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py
+++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py
@@ -76,7 +76,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza
     dml_did_binary_obj.fit()
 
     df_subset = dml_did_binary_obj.data_subset.copy()
-    dml_data = dml.data.DoubleMLData(
+    dml_data = dml.data.DoubleMLDIDData(
         df_subset, y_col="y", d_cols="G_indicator", x_cols=["Z1", "Z2", "Z3", "Z4"], t_col="t_indicator"
     )
     dml_did_obj = dml.DoubleMLDIDCS(
diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py
index 73e6b827..b9e267ce 100644
--- a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py
+++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py
@@ -55,7 +55,7 @@ def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score
     dml_panel_data = dml.data.DoubleMLPanelData(
         df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]
     )
-    obj_dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"])
+    obj_dml_data = dml.DoubleMLDIDData(df, y_col="y", d_cols="d", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"])
 
     n_obs = df.shape[0]
     all_smpls = draw_smpls(n_obs, n_folds)
diff --git a/doubleml/did/tests/test_did_tune.py b/doubleml/did/tests/test_did_tune.py
index 16ec2ee8..25899301 100644
--- a/doubleml/did/tests/test_did_tune.py
+++ b/doubleml/did/tests/test_did_tune.py
@@ -65,7 +65,7 @@ def dml_did_fixture(generate_data_did, learner_g, learner_m, score, in_sample_no
     ml_m = clone(learner_m)
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
+    obj_dml_data = dml.DoubleMLDIDData.from_arrays(x, y, d)
     dml_did_obj = dml.DoubleMLDID(
         obj_dml_data,
         ml_g,
diff --git a/doubleml/did/tests/test_return_types.py b/doubleml/did/tests/test_return_types.py
index 37105c3e..531a9706 100644
--- a/doubleml/did/tests/test_return_types.py
+++ b/doubleml/did/tests/test_return_types.py
@@ -3,7 +3,7 @@
 import pytest
 from sklearn.linear_model import Lasso, LogisticRegression
 
-from doubleml.data import DoubleMLData, DoubleMLPanelData
+from doubleml.data import DoubleMLDIDData, DoubleMLPanelData
 from doubleml.did import DoubleMLDID, DoubleMLDIDBinary, DoubleMLDIDCS, DoubleMLDIDCSBinary
 from doubleml.did.datasets import make_did_CS2021, make_did_cs_CS2021, make_did_SZ2020
 from doubleml.utils._check_return_types import (
@@ -37,8 +37,8 @@
 (x, y, d, t) = make_did_SZ2020(n_obs=N_OBS, cross_sectional_data=True, return_type="array")
 binary_outcome = np.random.binomial(n=1, p=0.5, size=N_OBS)
 
-datasets["did_binary_outcome"] = DoubleMLData.from_arrays(x, binary_outcome, d)
-datasets["did_cs_binary_outcome"] = DoubleMLData.from_arrays(x, binary_outcome, d, t=t)
+datasets["did_binary_outcome"] = DoubleMLDIDData.from_arrays(x, binary_outcome, d)
+datasets["did_cs_binary_outcome"] = DoubleMLDIDData.from_arrays(x, binary_outcome, d, t=t)
 
 dml_objs = [
     (DoubleMLDID(datasets["did"], Lasso(), LogisticRegression(), **dml_args), DoubleMLDID),

From 7be2d8f84a67fb2bfae1b33fc09583d0eb3d27da Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <janteiklu@gmail.com>
Date: Tue, 17 Jun 2025 16:32:18 +0200
Subject: [PATCH 84/84] Adjust RDD unit tests

---
 doubleml/rdd/tests/test_rdd_exceptions.py   | 8 ++++----
 doubleml/rdd/tests/test_rdd_return_types.py | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doubleml/rdd/tests/test_rdd_exceptions.py b/doubleml/rdd/tests/test_rdd_exceptions.py
index 6abf901e..71670793 100644
--- a/doubleml/rdd/tests/test_rdd_exceptions.py
+++ b/doubleml/rdd/tests/test_rdd_exceptions.py
@@ -6,7 +6,7 @@
 from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
 from sklearn.linear_model import Lasso, LogisticRegression
 
-from doubleml import DoubleMLData
+from doubleml import DoubleMLRDDData
 from doubleml.rdd import RDFlex
 from doubleml.rdd.datasets import make_simple_rdd_data
 
@@ -17,7 +17,7 @@
     columns=["y", "d", "score"] + ["x" + str(i) for i in range(data["X"].shape[1])],
 )
 
-dml_data = DoubleMLData(df, y_col="y", d_cols="d", s_col="score")
+dml_data = DoubleMLRDDData(df, y_col="y", d_cols="d", s_col="score")
 
 ml_g = Lasso()
 ml_m = LogisticRegression()
@@ -58,8 +58,8 @@ def predict_proba(self, X):
 
 @pytest.mark.ci_rdd
 def test_rdd_exception_data():
-    # DoubleMLData
-    msg = r"The data must be of DoubleMLData type. \[\] of type <class 'list'> was passed."
+    # DoubleMLRDDData
+    msg = r"The data must be of DoubleMLRDDData type. \[\] of type <class 'list'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = RDFlex([], ml_g)
 
diff --git a/doubleml/rdd/tests/test_rdd_return_types.py b/doubleml/rdd/tests/test_rdd_return_types.py
index 13248afd..56f2bfe4 100644
--- a/doubleml/rdd/tests/test_rdd_return_types.py
+++ b/doubleml/rdd/tests/test_rdd_return_types.py
@@ -15,7 +15,7 @@
     np.column_stack((data["Y"], data["D"], data["score"], data["X"])),
     columns=["y", "d", "score"] + ["x" + str(i) for i in range(data["X"].shape[1])],
 )
-dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", s_col="score")
+dml_data = dml.DoubleMLRDDData(df, y_col="y", d_cols="d", s_col="score")
 
 
 def _assert_return_types(dml_obj):