From a572a3a7b6e89c65e3eeb24fd715cf3e95889190 Mon Sep 17 00:00:00 2001
From: R-Palazzo <romainpalazzo@gmail.com>
Date: Wed, 25 Jun 2025 14:53:15 +0100
Subject: [PATCH 01/14] deprecate parameters warning

---
 sdgym/benchmark.py           | 28 ++++++++++++++++++++++++++
 tests/unit/test_benchmark.py | 38 ++++++++++++++++++++++++++++++++++--
 2 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
index 8614078b..a0c0292e 100644
--- a/sdgym/benchmark.py
+++ b/sdgym/benchmark.py
@@ -779,6 +779,31 @@ def _create_instance_on_ec2(script_content):
     print(f'Job kicked off for SDGym on {instance_id}')  # noqa
 
 
+def _handle_deprecated_parameters(
+    output_filepath, detailed_results_folder, multi_processing_config, run_on_ec2
+):
+    """Handle deprecated parameters and issue warnings."""
+    parameters_to_deprecate = {
+        'output_filepath': output_filepath,
+        'detailed_results_folder': detailed_results_folder,
+        'multi_processing_config': multi_processing_config,
+        'run_on_ec2': run_on_ec2,
+    }
+    parameters = []
+    for name, value in parameters_to_deprecate.items():
+        if value is not None and value:
+            parameters.append(name)
+
+    if parameters:
+        parameters = "', '".join(sorted(parameters))
+        message = (
+            f"Parameters '{parameters}' are deprecated in the `benchmark_single_table` "
+            'function and will be removed in October 2025. '
+            'Please consider using `output_destination` instead.'
+        )
+        warnings.warn(message, FutureWarning)
+
+
 def benchmark_single_table(
     synthesizers=DEFAULT_SYNTHESIZERS,
     custom_synthesizers=None,
@@ -863,6 +888,9 @@ def benchmark_single_table(
         pandas.DataFrame:
             A table containing one row per synthesizer + dataset + metric.
     """
+    _handle_deprecated_parameters(
+        output_filepath, detailed_results_folder, multi_processing_config, run_on_ec2
+    )
     if run_on_ec2:
         print("This will create an instance for the current AWS user's account.")  # noqa
         if output_filepath is not None:
diff --git a/tests/unit/test_benchmark.py b/tests/unit/test_benchmark.py
index 10b98d12..24fd47b0 100644
--- a/tests/unit/test_benchmark.py
+++ b/tests/unit/test_benchmark.py
@@ -1,3 +1,4 @@
+import re
 from unittest.mock import ANY, MagicMock, patch
 
 import pandas as pd
@@ -9,6 +10,7 @@
     _create_sdgym_script,
     _directory_exists,
     _format_output,
+    _handle_deprecated_parameters,
 )
 from sdgym.synthesizers import GaussianCopulaSynthesizer
 
@@ -33,7 +35,8 @@ def test_output_file_exists(path_mock):
 
 
 @patch('sdgym.benchmark.tqdm.tqdm')
-def test_progress_bar_updates(tqdm_mock):
+@patch('sdgym.benchmark._handle_deprecated_parameters')
+def test_benchmark_single_table_deprecated_params(mock_handle_deprecated, tqdm_mock):
     """Test that the benchmarking function updates the progress bar on one line."""
     # Setup
     scores_mock = MagicMock()
@@ -48,9 +51,11 @@ def test_progress_bar_updates(tqdm_mock):
     )
 
     # Assert
+    mock_handle_deprecated.assert_called_once_with(
+        None, None, None, False
+    )
     tqdm_mock.assert_called_once_with(ANY, total=1, position=0, leave=True)
 
-
 @patch('sdgym.benchmark._score')
 @patch('sdgym.benchmark.multiprocessing')
 def test_benchmark_single_table_with_timeout(mock_multiprocessing, mock__score):
@@ -301,3 +306,32 @@ def test__format_output():
         'NewMetric': [0.998],
     })
     pd.testing.assert_frame_equal(scores, expected_scores)
+
+
+def test__handle_deprecated_parameters():
+    """Test the ``_handle_deprecated_parameters`` function."""
+    # Setup
+    output_filepath = 's3://BucketName/path'
+    detailed_results_folder = 'mock/path'
+    multi_processing_config = {'num_processes': 4}
+    run_on_ec2 = True
+    expected_message_1 = re.escape(
+        "Parameters 'detailed_results_folder', 'output_filepath' are deprecated in the "
+        '`benchmark_single_table` function and will be removed in October 2025. Please '
+        'consider using `output_destination` instead.'
+    )
+    expected_message_2 = re.escape(
+        "Parameters 'detailed_results_folder', 'multi_processing_config', 'output_filepath'"
+        ", 'run_on_ec2' are deprecated in the `benchmark_single_table` function and will be"
+        ' removed in October 2025. Please consider using `output_destination` instead.'
+    )
+
+    # Run and Assert
+    _handle_deprecated_parameters(None, None, None, False)
+    with pytest.warns(FutureWarning, match=expected_message_1):
+        _handle_deprecated_parameters(output_filepath, detailed_results_folder, None, False)
+
+    with pytest.warns(FutureWarning, match=expected_message_2):
+        _handle_deprecated_parameters(
+            output_filepath, detailed_results_folder, multi_processing_config, run_on_ec2
+        )

From 8e967cc68015962fed95d5b6e7f571dc75f2f29d Mon Sep 17 00:00:00 2001
From: R-Palazzo <romainpalazzo@gmail.com>
Date: Wed, 25 Jun 2025 16:55:01 +0100
Subject: [PATCH 02/14] define _setup_output_destination

---
 sdgym/benchmark.py           | 78 +++++++++++++++++++++++++++++++++
 tests/unit/test_benchmark.py | 83 ++++++++++++++++++++++++++++++++++--
 2 files changed, 158 insertions(+), 3 deletions(-)

diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
index a0c0292e..feaaac06 100644
--- a/sdgym/benchmark.py
+++ b/sdgym/benchmark.py
@@ -8,7 +8,9 @@
 import pickle
 import tracemalloc
 import warnings
+from collections import defaultdict
 from contextlib import contextmanager
+from datetime import datetime
 from pathlib import Path
 
 import boto3
@@ -804,6 +806,74 @@ def _handle_deprecated_parameters(
         warnings.warn(message, FutureWarning)
 
 
+def _validate_output_destination(output_destination):
+    if not isinstance(output_destination, str):
+        raise ValueError(
+            'The `output_destination` parameter must be a string representing the output path.'
+        )
+
+    if is_s3_path(output_destination):
+        raise ValueError(
+            'The `output_destination` parameter cannot be an S3 path. '
+            'Please use `benchmark_single_table_aws` instead.'
+        )
+
+    if os.path.exists(output_destination):
+        raise ValueError(f'The output path {output_destination} already exists.')
+
+
+def _setup_output_destination(
+    output_destination, synthesizers, custom_synthesizers, sdv_datasets, additional_datasets_folder
+):
+    """Set up the output destination for the benchmark results.
+
+    Args:
+        output_destination (str or None):
+            The path to the output directory where results will be saved.
+            If None, no output will be saved.
+        synthesizers (list):
+            The list of synthesizers to benchmark.
+        custom_synthesizers (list):
+            The list of custom synthesizers to benchmark.
+        sdv_datasets (list):
+            The list of SDV datasets to benchmark.
+        additional_datasets_folder (str or None):
+            The path to a folder containing additional datasets.
+    """
+    if output_destination is None:
+        return None
+
+    _validate_output_destination(output_destination)
+    os.makedirs(output_destination, exist_ok=True)
+    today = datetime.today().strftime('%m_%d_%Y')
+    top_folder = os.path.join(output_destination, f'SDGym_results_{today}')
+    os.makedirs(top_folder, exist_ok=True)
+    all_synthesizers = synthesizers + custom_synthesizers
+    all_datasets = sdv_datasets.copy()
+    if additional_datasets_folder:
+        additional_datasets = [
+            f'additional_datasets_folder/{name.split(".")[0]}'
+            for name in os.listdir(additional_datasets_folder)
+            if name.endswith('.csv')
+        ]
+        all_datasets.extend(additional_datasets)
+
+    paths = defaultdict(dict)
+    for dataset in all_datasets:
+        dataset_folder_name = f'{dataset}_{today}'
+        dataset_folder_path = os.path.join(top_folder, dataset_folder_name)
+        os.makedirs(dataset_folder_path, exist_ok=True)
+        paths[dataset]['meta'] = os.path.join(dataset_folder_path, 'meta.yaml')
+        for synth_name in all_synthesizers:
+            synth_folder_path = os.path.join(dataset_folder_path, synth_name)
+            os.makedirs(synth_folder_path, exist_ok=True)
+            synth_file = os.path.join(synth_folder_path, 'synthesizer.pkl')
+            data_file = os.path.join(synth_folder_path, 'synthetic_data.csv')
+            paths[dataset][synth_name] = {'synthesizer': synth_file, 'synthetic_data': data_file}
+
+    return paths
+
+
 def benchmark_single_table(
     synthesizers=DEFAULT_SYNTHESIZERS,
     custom_synthesizers=None,
@@ -815,6 +885,7 @@ def benchmark_single_table(
     compute_privacy_score=True,
     sdmetrics=None,
     timeout=None,
+    output_destination=None,
     output_filepath=None,
     detailed_results_folder=None,
     show_progress=False,
@@ -891,6 +962,13 @@ def benchmark_single_table(
     _handle_deprecated_parameters(
         output_filepath, detailed_results_folder, multi_processing_config, run_on_ec2
     )
+    paths = _setup_output_destination(
+        output_destination,
+        synthesizers,
+        custom_synthesizers,
+        sdv_datasets,
+        additional_datasets_folder,
+    )
     if run_on_ec2:
         print("This will create an instance for the current AWS user's account.")  # noqa
         if output_filepath is not None:
diff --git a/tests/unit/test_benchmark.py b/tests/unit/test_benchmark.py
index 24fd47b0..5278d679 100644
--- a/tests/unit/test_benchmark.py
+++ b/tests/unit/test_benchmark.py
@@ -1,4 +1,6 @@
+import json
 import re
+from datetime import datetime
 from unittest.mock import ANY, MagicMock, patch
 
 import pandas as pd
@@ -11,6 +13,8 @@
     _directory_exists,
     _format_output,
     _handle_deprecated_parameters,
+    _setup_output_destination,
+    _validate_output_destination,
 )
 from sdgym.synthesizers import GaussianCopulaSynthesizer
 
@@ -51,11 +55,10 @@ def test_benchmark_single_table_deprecated_params(mock_handle_deprecated, tqdm_m
     )
 
     # Assert
-    mock_handle_deprecated.assert_called_once_with(
-        None, None, None, False
-    )
+    mock_handle_deprecated.assert_called_once_with(None, None, None, False)
     tqdm_mock.assert_called_once_with(ANY, total=1, position=0, leave=True)
 
+
 @patch('sdgym.benchmark._score')
 @patch('sdgym.benchmark.multiprocessing')
 def test_benchmark_single_table_with_timeout(mock_multiprocessing, mock__score):
@@ -335,3 +338,77 @@ def test__handle_deprecated_parameters():
         _handle_deprecated_parameters(
             output_filepath, detailed_results_folder, multi_processing_config, run_on_ec2
         )
+
+
+def test__validate_output_destination(tmp_path):
+    """Test the `_validate_output_destination` function."""
+    # Setup
+    wrong_type = 12345
+    aws_destination = 's3://valid-bucket/path/to/file'
+    valid_destination = tmp_path / 'valid-destination'
+    err_1 = re.escape(
+        'The `output_destination` parameter must be a string representing the output path.'
+    )
+    err_2 = re.escape(
+        'The `output_destination` parameter cannot be an S3 path. '
+        'Please use `benchmark_single_table_aws` instead.'
+    )
+    err_3 = re.escape(f'The output path {valid_destination} already exists.')
+
+    # Run and Assert
+    _validate_output_destination(str(valid_destination))
+    with pytest.raises(ValueError, match=err_1):
+        _validate_output_destination(wrong_type)
+
+    with pytest.raises(ValueError, match=err_2):
+        _validate_output_destination(aws_destination)
+
+    valid_destination.mkdir()
+    with pytest.raises(ValueError, match=err_3):
+        _validate_output_destination(str(valid_destination))
+
+
+@patch('sdgym.benchmark._validate_output_destination')
+def test__setup_output_destination(mock_validate, tmp_path):
+    """Test the `_setup_output_destination` function."""
+    # Setup
+    output_destination = tmp_path / 'output_destination'
+    synthesizers = ['GaussianCopulaSynthesizer', 'CTGANSynthesizer']
+    customsynthesizers = ['CustomSynthesizer']
+    datasets = ['adult', 'census']
+    additional_datasets_folder = tmp_path / 'additional_datasets'
+    additional_datasets_folder.mkdir()
+    additional_data = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
+    additional_data.to_csv(additional_datasets_folder / 'additional_data.csv', index=False)
+    today = datetime.today().strftime('%m_%d_%Y')
+    base_path = output_destination / f'SDGym_results_{today}'
+
+    # Run
+    result_1 = _setup_output_destination(
+        None, synthesizers, customsynthesizers, datasets, additional_datasets_folder
+    )
+    result_2 = _setup_output_destination(
+        output_destination, synthesizers, customsynthesizers, datasets, additional_datasets_folder
+    )
+
+    # Assert
+    expected = {
+        dataset: {
+            'meta': str(base_path / f'{dataset}_{today}' / 'meta.yaml'),
+            **{
+                synth: {
+                    'synthesizer': str(
+                        base_path / f'{dataset}_{today}' / synth / 'synthesizer.pkl'
+                    ),
+                    'synthetic_data': str(
+                        base_path / f'{dataset}_{today}' / synth / 'synthetic_data.csv'
+                    ),
+                }
+                for synth in synthesizers + customsynthesizers
+            },
+        }
+        for dataset in datasets + ['additional_datasets_folder/additional_data']
+    }
+    assert result_1 is None
+    mock_validate.assert_called_once_with(output_destination)
+    assert json.loads(json.dumps(result_2)) == expected

From cdf3fceafd3f5b08e613b4766d57efd1df7eda8e Mon Sep 17 00:00:00 2001
From: R-Palazzo <romainpalazzo@gmail.com>
Date: Thu, 26 Jun 2025 11:49:14 +0100
Subject: [PATCH 03/14] define run_id.yaml file

---
 sdgym/benchmark.py           | 65 ++++++++++++++++++++++++++++++++++--
 tests/unit/test_benchmark.py | 58 ++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
index feaaac06..542d4526 100644
--- a/sdgym/benchmark.py
+++ b/sdgym/benchmark.py
@@ -7,10 +7,12 @@
 import os
 import pickle
 import tracemalloc
+import uuid
 import warnings
 from collections import defaultdict
 from contextlib import contextmanager
 from datetime import datetime
+from importlib.metadata import version
 from pathlib import Path
 
 import boto3
@@ -19,6 +21,7 @@
 import numpy as np
 import pandas as pd
 import tqdm
+import yaml
 from sdmetrics.reports.multi_table import (
     DiagnosticReport as MultiTableDiagnosticReport,
 )
@@ -65,6 +68,15 @@
     'covtype',
 ]
 N_BYTES_IN_MB = 1000 * 1000
+EXTERNAL_SYNTHESIZER_TO_LIBRARY = {
+    'RealTabFormerSynthesizer': 'realtabformer',
+}
+SDV_SINGLE_TABLE_SYNTHESIZERS = [
+    'GaussianCopulaSynthesizer',
+    'CTGANSynthesizer',
+    'CopulaGANSynthesizer',
+    'TVAESynthesizer',
+]
 
 
 def _validate_inputs(output_filepath, detailed_results_folder, synthesizers, custom_synthesizers):
@@ -151,7 +163,7 @@ def _generate_job_args_list(
     return job_args_list
 
 
-def _synthesize(synthesizer_dict, real_data, metadata):
+def _synthesize(synthesizer_dict, real_data, metadata, path=None):
     synthesizer = synthesizer_dict['synthesizer']
     if isinstance(synthesizer, type):
         assert issubclass(synthesizer, BaselineSynthesizer), (
@@ -874,6 +886,39 @@ def _setup_output_destination(
     return paths
 
 
+def _write_run_id_file(output_destination, synthesizers, job_args_list):
+    run_id = str(uuid.uuid4())[:8]
+    metadata = {
+        'run_id': run_id,
+        'starting_date': datetime.today().strftime('%m_%d_%Y %H:%M:%S'),
+        'completed_date': None,
+        'sdgym_version': version('sdgym'),
+        'jobs': job_args_list,
+    }
+    for synthesizer in synthesizers:
+        if synthesizer not in SDV_SINGLE_TABLE_SYNTHESIZERS:
+            ext_lib = EXTERNAL_SYNTHESIZER_TO_LIBRARY[synthesizer]
+            library_version = version(ext_lib)
+            metadata[f'{ext_lib}_version'] = library_version
+        elif 'sdv' not in metadata.keys():
+            metadata['sdv_version'] = version('sdv')
+
+    with open(f'{output_destination}/run_{run_id}.yaml', 'w') as file:
+        yaml.dump(metadata, file)
+
+    return run_id
+
+
+def _update_run_id_file(output_destination, run_id):
+    run_file = Path(output_destination) / f'run_{run_id}.yaml'
+    with open(run_file, 'r') as f:
+        run_data = yaml.safe_load(f) or {}
+
+    run_data['completed_date'] = datetime.today().strftime('%m_%d_%Y %H:%M:%S')
+    with open(run_file, 'w') as f:
+        yaml.dump(run_data, f)
+
+
 def benchmark_single_table(
     synthesizers=DEFAULT_SYNTHESIZERS,
     custom_synthesizers=None,
@@ -933,6 +978,18 @@ def benchmark_single_table(
         timeout (int or ``None``):
             The maximum number of seconds to wait for synthetic data creation. If ``None``, no
             timeout is enforced.
+        output_destination (str or ``None``):
+            The path to the output directory where results will be saved. If ``None``, no
+            output is saved. The results are saved with the following structure:
+            output_destination/
+                run_<id>.yaml
+                SDGym_results_<date>/
+                    results.csv
+                    <dataset_name>_<date>/
+                    meta.yaml
+                    <synthesizer_name>/
+                        synthesizer.pkl
+                        synthetic_data.csv
         output_filepath (str or ``None``):
             A file path for where to write the output as a csv file. If ``None``, no output
             is written. If run_on_ec2 flag output_filepath needs to be defined and
@@ -981,7 +1038,6 @@ def benchmark_single_table(
     _validate_inputs(output_filepath, detailed_results_folder, synthesizers, custom_synthesizers)
 
     _create_detailed_results_directory(detailed_results_folder)
-
     job_args_list = _generate_job_args_list(
         limit_dataset_size,
         sdv_datasets,
@@ -995,6 +1051,8 @@ def benchmark_single_table(
         synthesizers,
         custom_synthesizers,
     )
+    if paths is not None:
+        run_id = _write_run_id_file(output_destination, synthesizers, job_args_list)
 
     if job_args_list:
         scores = _run_jobs(multi_processing_config, job_args_list, show_progress)
@@ -1011,4 +1069,7 @@ def benchmark_single_table(
     if output_filepath:
         write_csv(scores, output_filepath, None, None)
 
+    if paths is not None:
+        _update_run_id_file(output_destination, run_id)
+
     return scores
diff --git a/tests/unit/test_benchmark.py b/tests/unit/test_benchmark.py
index 5278d679..372ca62e 100644
--- a/tests/unit/test_benchmark.py
+++ b/tests/unit/test_benchmark.py
@@ -1,10 +1,12 @@
 import json
 import re
 from datetime import datetime
+from importlib.metadata import version
 from unittest.mock import ANY, MagicMock, patch
 
 import pandas as pd
 import pytest
+import yaml
 
 from sdgym import benchmark_single_table
 from sdgym.benchmark import (
@@ -14,7 +16,9 @@
     _format_output,
     _handle_deprecated_parameters,
     _setup_output_destination,
+    _update_run_id_file,
     _validate_output_destination,
+    _write_run_id_file,
 )
 from sdgym.synthesizers import GaussianCopulaSynthesizer
 
@@ -412,3 +416,57 @@ def test__setup_output_destination(mock_validate, tmp_path):
     assert result_1 is None
     mock_validate.assert_called_once_with(output_destination)
     assert json.loads(json.dumps(result_2)) == expected
+
+
+@patch('sdgym.benchmark.uuid.uuid4')
+@patch('sdgym.benchmark.datetime')
+def test__write_run_id_file(mock_datetime, mock_uuid, tmp_path):
+    """Test the `_write_run_id_file` method."""
+    # Setup
+    output_destination = tmp_path / 'output_destination'
+    output_destination.mkdir()
+    mock_uuid.return_value = '123456789999'
+    mock_datetime.today.return_value.strftime.return_value = '06_26_2025'
+    jobs = ['job1', 'job2']
+    synthesizers = ['GaussianCopulaSynthesizer', 'CTGANSynthesizer', 'RealTabFormerSynthesizer']
+
+    # Run
+    run_id = _write_run_id_file(output_destination, synthesizers, jobs)
+
+    # Assert
+    assert run_id == '12345678'
+    run_id_file = output_destination / 'run_12345678.yaml'
+    assert run_id_file.exists()
+    with open(run_id_file, 'r') as file:
+        run_id_data = yaml.safe_load(file)
+        assert run_id_data['run_id'] == '12345678'
+        assert run_id_data['starting_date'] == '06_26_2025'
+        assert run_id_data['jobs'] == jobs
+        assert run_id_data['sdgym_version'] == version('sdgym')
+        assert run_id_data['sdv_version'] == version('sdv')
+        assert run_id_data['realtabformer_version'] == version('realtabformer')
+        assert run_id_data['completed_date'] is None
+
+
+@patch('sdgym.benchmark.datetime')
+def test__update_run_id_file(mock_datetime, tmp_path):
+    """Test the `_update_run_id_file` method."""
+    # Setup
+    output_destination = tmp_path / 'output_destination'
+    output_destination.mkdir()
+    run_id = '12345678'
+    mock_datetime.today.return_value.strftime.return_value = '06_26_2025'
+    metadata = {'run_id': run_id, 'starting_date': '06_25_2025', 'completed_date': None}
+    run_id_file = output_destination / f'run_{run_id}.yaml'
+    with open(run_id_file, 'w') as file:
+        yaml.dump(metadata, file)
+
+    # Run
+    _update_run_id_file(output_destination, run_id)
+
+    # Assert
+    with open(run_id_file, 'r') as file:
+        run_id_data = yaml.safe_load(file)
+        assert run_id_data['completed_date'] == '06_26_2025'
+        assert run_id_data['starting_date'] == '06_25_2025'
+        assert run_id_data['run_id'] == run_id

From 6f6b0bcb5f52d7f7d7cb8410813907afa1b3971a Mon Sep 17 00:00:00 2001
From: R-Palazzo <romainpalazzo@gmail.com>
Date: Thu, 26 Jun 2025 16:07:09 +0100
Subject: [PATCH 04/14] save synthetic_data and synthesizer

---
 sdgym/benchmark.py           | 129 +++++++++++++++++------------------
 tests/unit/test_benchmark.py |  27 +++-----
 2 files changed, 73 insertions(+), 83 deletions(-)

diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
index 542d4526..ff0c570d 100644
--- a/sdgym/benchmark.py
+++ b/sdgym/benchmark.py
@@ -107,6 +107,46 @@ def _create_detailed_results_directory(detailed_results_folder):
         os.makedirs(detailed_results_folder, exist_ok=True)
 
 
+def _setup_output_destination(output_destination, synthesizers, datasets):
+    """Set up the output destination for the benchmark results.
+
+    Args:
+        output_destination (str or None):
+            The path to the output directory where results will be saved.
+            If None, no output will be saved.
+        synthesizers (list):
+            The list of synthesizers to benchmark.
+        datasets (list):
+            The list of datasets to benchmark.
+    """
+    if output_destination is None:
+        return {}
+
+    _validate_output_destination(output_destination)
+    output_path = Path(output_destination)
+    output_path.mkdir(parents=True, exist_ok=True)
+    today = datetime.today().strftime('%m_%d_%Y')
+    top_folder = output_path / f'SDGym_results_{today}'
+    top_folder.mkdir(parents=True, exist_ok=True)
+
+    paths = defaultdict(dict)
+    for dataset in datasets:
+        dataset_folder = top_folder / f'{dataset}_{today}'
+        dataset_folder.mkdir(parents=True, exist_ok=True)
+        paths[dataset]['meta'] = str(dataset_folder / 'meta.yaml')
+
+        for synth_name in synthesizers:
+            synth_folder = dataset_folder / synth_name
+            synth_folder.mkdir(parents=True, exist_ok=True)
+
+            paths[dataset][synth_name] = {
+                'synthesizer': str(synth_folder / 'synthesizer.pkl'),
+                'synthetic_data': str(synth_folder / 'synthetic_data.csv'),
+            }
+
+    return paths
+
+
 def _generate_job_args_list(
     limit_dataset_size,
     sdv_datasets,
@@ -114,6 +154,7 @@ def _generate_job_args_list(
     sdmetrics,
     detailed_results_folder,
     timeout,
+    output_destination,
     compute_quality_score,
     compute_diagnostic_score,
     compute_privacy_score,
@@ -133,7 +174,9 @@ def _generate_job_args_list(
         else get_dataset_paths(bucket=additional_datasets_folder)
     )
     datasets = sdv_datasets + additional_datasets
-
+    synthesizer_names = [synthesizer['name'] for synthesizer in synthesizers]
+    dataset_names = [dataset.name for dataset in datasets]
+    paths = _setup_output_destination(output_destination, synthesizer_names, dataset_names)
     job_tuples = []
     for dataset in datasets:
         for synthesizer in synthesizers:
@@ -144,7 +187,7 @@ def _generate_job_args_list(
         data, metadata_dict = load_dataset(
             'single_table', dataset, limit_dataset_size=limit_dataset_size
         )
-
+        path = paths.get(dataset.name, {}).get(synthesizer['name'], None)
         args = (
             synthesizer,
             data,
@@ -157,6 +200,7 @@ def _generate_job_args_list(
             compute_privacy_score,
             dataset.name,
             'single_table',
+            path,
         )
         job_args_list.append(args)
 
@@ -191,6 +235,10 @@ def _synthesize(synthesizer_dict, real_data, metadata, path=None):
     peak_memory = tracemalloc.get_traced_memory()[1] / N_BYTES_IN_MB
     tracemalloc.stop()
     tracemalloc.clear_traces()
+    if path is not None:
+        synthetic_data.to_csv(path['synthetic_data'], index=False)
+        with open(path['synthesizer'], 'wb') as f:
+            pickle.dump(synthesizer_obj, f)
 
     return synthetic_data, train_now - now, sample_now - train_now, synthesizer_size, peak_memory
 
@@ -297,6 +345,7 @@ def _score(
     compute_privacy_score=False,
     modality=None,
     dataset_name=None,
+    path=None,
 ):
     if output is None:
         output = {}
@@ -316,7 +365,7 @@ def _score(
         # To be deleted if there is no error
         output['error'] = 'Synthesizer Timeout'
         synthetic_data, train_time, sample_time, synthesizer_size, peak_memory = _synthesize(
-            synthesizer, data.copy(), metadata
+            synthesizer, data.copy(), metadata, path
         )
 
         output['synthetic_data'] = synthetic_data
@@ -397,6 +446,7 @@ def _score_with_timeout(
     compute_privacy_score=False,
     modality=None,
     dataset_name=None,
+    path=None,
 ):
     with multiprocessing_context():
         with multiprocessing.Manager() as manager:
@@ -414,6 +464,7 @@ def _score_with_timeout(
                     compute_privacy_score,
                     modality,
                     dataset_name,
+                    path,
                 ),
             )
 
@@ -514,6 +565,7 @@ def _run_job(args):
         compute_privacy_score,
         dataset_name,
         modality,
+        path,
     ) = args
 
     name = synthesizer['name']
@@ -524,7 +576,6 @@ def _run_job(args):
         timeout,
         used_memory(),
     )
-
     output = {}
     try:
         if timeout:
@@ -539,6 +590,7 @@ def _run_job(args):
                 compute_privacy_score=compute_privacy_score,
                 modality=modality,
                 dataset_name=dataset_name,
+                path=path,
             )
         else:
             output = _score(
@@ -551,6 +603,7 @@ def _run_job(args):
                 compute_privacy_score=compute_privacy_score,
                 modality=modality,
                 dataset_name=dataset_name,
+                path=path,
             )
     except Exception as error:
         output['exception'] = error
@@ -834,66 +887,15 @@ def _validate_output_destination(output_destination):
         raise ValueError(f'The output path {output_destination} already exists.')
 
 
-def _setup_output_destination(
-    output_destination, synthesizers, custom_synthesizers, sdv_datasets, additional_datasets_folder
-):
-    """Set up the output destination for the benchmark results.
-
-    Args:
-        output_destination (str or None):
-            The path to the output directory where results will be saved.
-            If None, no output will be saved.
-        synthesizers (list):
-            The list of synthesizers to benchmark.
-        custom_synthesizers (list):
-            The list of custom synthesizers to benchmark.
-        sdv_datasets (list):
-            The list of SDV datasets to benchmark.
-        additional_datasets_folder (str or None):
-            The path to a folder containing additional datasets.
-    """
-    if output_destination is None:
-        return None
-
-    _validate_output_destination(output_destination)
-    os.makedirs(output_destination, exist_ok=True)
-    today = datetime.today().strftime('%m_%d_%Y')
-    top_folder = os.path.join(output_destination, f'SDGym_results_{today}')
-    os.makedirs(top_folder, exist_ok=True)
-    all_synthesizers = synthesizers + custom_synthesizers
-    all_datasets = sdv_datasets.copy()
-    if additional_datasets_folder:
-        additional_datasets = [
-            f'additional_datasets_folder/{name.split(".")[0]}'
-            for name in os.listdir(additional_datasets_folder)
-            if name.endswith('.csv')
-        ]
-        all_datasets.extend(additional_datasets)
-
-    paths = defaultdict(dict)
-    for dataset in all_datasets:
-        dataset_folder_name = f'{dataset}_{today}'
-        dataset_folder_path = os.path.join(top_folder, dataset_folder_name)
-        os.makedirs(dataset_folder_path, exist_ok=True)
-        paths[dataset]['meta'] = os.path.join(dataset_folder_path, 'meta.yaml')
-        for synth_name in all_synthesizers:
-            synth_folder_path = os.path.join(dataset_folder_path, synth_name)
-            os.makedirs(synth_folder_path, exist_ok=True)
-            synth_file = os.path.join(synth_folder_path, 'synthesizer.pkl')
-            data_file = os.path.join(synth_folder_path, 'synthetic_data.csv')
-            paths[dataset][synth_name] = {'synthesizer': synth_file, 'synthetic_data': data_file}
-
-    return paths
-
-
 def _write_run_id_file(output_destination, synthesizers, job_args_list):
+    jobs = [[job[0]['name'], job[-3]] for job in job_args_list]
     run_id = str(uuid.uuid4())[:8]
     metadata = {
         'run_id': run_id,
         'starting_date': datetime.today().strftime('%m_%d_%Y %H:%M:%S'),
         'completed_date': None,
         'sdgym_version': version('sdgym'),
-        'jobs': job_args_list,
+        'jobs': jobs,
     }
     for synthesizer in synthesizers:
         if synthesizer not in SDV_SINGLE_TABLE_SYNTHESIZERS:
@@ -1019,13 +1021,6 @@ def benchmark_single_table(
     _handle_deprecated_parameters(
         output_filepath, detailed_results_folder, multi_processing_config, run_on_ec2
     )
-    paths = _setup_output_destination(
-        output_destination,
-        synthesizers,
-        custom_synthesizers,
-        sdv_datasets,
-        additional_datasets_folder,
-    )
     if run_on_ec2:
         print("This will create an instance for the current AWS user's account.")  # noqa
         if output_filepath is not None:
@@ -1036,7 +1031,6 @@ def benchmark_single_table(
         return None
 
     _validate_inputs(output_filepath, detailed_results_folder, synthesizers, custom_synthesizers)
-
     _create_detailed_results_directory(detailed_results_folder)
     job_args_list = _generate_job_args_list(
         limit_dataset_size,
@@ -1045,13 +1039,14 @@ def benchmark_single_table(
         sdmetrics,
         detailed_results_folder,
         timeout,
+        output_destination,
         compute_quality_score,
         compute_diagnostic_score,
         compute_privacy_score,
         synthesizers,
         custom_synthesizers,
     )
-    if paths is not None:
+    if output_destination is not None:
         run_id = _write_run_id_file(output_destination, synthesizers, job_args_list)
 
     if job_args_list:
@@ -1069,7 +1064,7 @@ def benchmark_single_table(
     if output_filepath:
         write_csv(scores, output_filepath, None, None)
 
-    if paths is not None:
+    if output_destination is not None:
         _update_run_id_file(output_destination, run_id)
 
     return scores
diff --git a/tests/unit/test_benchmark.py b/tests/unit/test_benchmark.py
index 372ca62e..ac1c642d 100644
--- a/tests/unit/test_benchmark.py
+++ b/tests/unit/test_benchmark.py
@@ -378,22 +378,13 @@ def test__setup_output_destination(mock_validate, tmp_path):
     # Setup
     output_destination = tmp_path / 'output_destination'
     synthesizers = ['GaussianCopulaSynthesizer', 'CTGANSynthesizer']
-    customsynthesizers = ['CustomSynthesizer']
     datasets = ['adult', 'census']
-    additional_datasets_folder = tmp_path / 'additional_datasets'
-    additional_datasets_folder.mkdir()
-    additional_data = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
-    additional_data.to_csv(additional_datasets_folder / 'additional_data.csv', index=False)
     today = datetime.today().strftime('%m_%d_%Y')
     base_path = output_destination / f'SDGym_results_{today}'
 
     # Run
-    result_1 = _setup_output_destination(
-        None, synthesizers, customsynthesizers, datasets, additional_datasets_folder
-    )
-    result_2 = _setup_output_destination(
-        output_destination, synthesizers, customsynthesizers, datasets, additional_datasets_folder
-    )
+    result_1 = _setup_output_destination(None, synthesizers, datasets)
+    result_2 = _setup_output_destination(output_destination, synthesizers, datasets)
 
     # Assert
     expected = {
@@ -408,12 +399,12 @@ def test__setup_output_destination(mock_validate, tmp_path):
                         base_path / f'{dataset}_{today}' / synth / 'synthetic_data.csv'
                     ),
                 }
-                for synth in synthesizers + customsynthesizers
+                for synth in synthesizers
             },
         }
-        for dataset in datasets + ['additional_datasets_folder/additional_data']
+        for dataset in datasets
     }
-    assert result_1 is None
+    assert result_1 == {}
     mock_validate.assert_called_once_with(output_destination)
     assert json.loads(json.dumps(result_2)) == expected
 
@@ -427,7 +418,11 @@ def test__write_run_id_file(mock_datetime, mock_uuid, tmp_path):
     output_destination.mkdir()
     mock_uuid.return_value = '123456789999'
     mock_datetime.today.return_value.strftime.return_value = '06_26_2025'
-    jobs = ['job1', 'job2']
+    jobs = [
+        ({'name': 'GaussianCopulaSynthesizer'}, 'adult', None, None),
+        ({'name': 'CTGANSynthesizer'}, 'census', None, None),
+    ]
+    expected_jobs = [['GaussianCopulaSynthesizer', 'adult'], ['CTGANSynthesizer', 'census']]
     synthesizers = ['GaussianCopulaSynthesizer', 'CTGANSynthesizer', 'RealTabFormerSynthesizer']
 
     # Run
@@ -441,7 +436,7 @@ def test__write_run_id_file(mock_datetime, mock_uuid, tmp_path):
         run_id_data = yaml.safe_load(file)
         assert run_id_data['run_id'] == '12345678'
         assert run_id_data['starting_date'] == '06_26_2025'
-        assert run_id_data['jobs'] == jobs
+        assert run_id_data['jobs'] == expected_jobs
         assert run_id_data['sdgym_version'] == version('sdgym')
         assert run_id_data['sdv_version'] == version('sdv')
         assert run_id_data['realtabformer_version'] == version('realtabformer')

From 8a7037892063d22d1aa827fa158c118502b619f2 Mon Sep 17 00:00:00 2001
From: R-Palazzo <romainpalazzo@gmail.com>
Date: Thu, 26 Jun 2025 16:44:10 +0100
Subject: [PATCH 05/14] save final results

---
 sdgym/benchmark.py           | 33 +++++++++++++++++++++------------
 tests/unit/test_benchmark.py |  2 +-
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
index ff0c570d..c638d2df 100644
--- a/sdgym/benchmark.py
+++ b/sdgym/benchmark.py
@@ -207,7 +207,7 @@ def _generate_job_args_list(
     return job_args_list
 
 
-def _synthesize(synthesizer_dict, real_data, metadata, path=None):
+def _synthesize(synthesizer_dict, real_data, metadata, synthesizer_path=None):
     synthesizer = synthesizer_dict['synthesizer']
     if isinstance(synthesizer, type):
         assert issubclass(synthesizer, BaselineSynthesizer), (
@@ -235,9 +235,9 @@ def _synthesize(synthesizer_dict, real_data, metadata, path=None):
     peak_memory = tracemalloc.get_traced_memory()[1] / N_BYTES_IN_MB
     tracemalloc.stop()
     tracemalloc.clear_traces()
-    if path is not None:
-        synthetic_data.to_csv(path['synthetic_data'], index=False)
-        with open(path['synthesizer'], 'wb') as f:
+    if synthesizer_path is not None:
+        synthetic_data.to_csv(synthesizer_path['synthetic_data'], index=False)
+        with open(synthesizer_path['synthesizer'], 'wb') as f:
             pickle.dump(synthesizer_obj, f)
 
     return synthetic_data, train_now - now, sample_now - train_now, synthesizer_size, peak_memory
@@ -345,7 +345,7 @@ def _score(
     compute_privacy_score=False,
     modality=None,
     dataset_name=None,
-    path=None,
+    synthesizer_path=None,
 ):
     if output is None:
         output = {}
@@ -365,7 +365,7 @@ def _score(
         # To be deleted if there is no error
         output['error'] = 'Synthesizer Timeout'
         synthetic_data, train_time, sample_time, synthesizer_size, peak_memory = _synthesize(
-            synthesizer, data.copy(), metadata, path
+            synthesizer, data.copy(), metadata, synthesizer_path=synthesizer_path
         )
 
         output['synthetic_data'] = synthetic_data
@@ -446,7 +446,7 @@ def _score_with_timeout(
     compute_privacy_score=False,
     modality=None,
     dataset_name=None,
-    path=None,
+    synthesizer_path=None,
 ):
     with multiprocessing_context():
         with multiprocessing.Manager() as manager:
@@ -464,7 +464,7 @@ def _score_with_timeout(
                     compute_privacy_score,
                     modality,
                     dataset_name,
-                    path,
+                    synthesizer_path,
                 ),
             )
 
@@ -565,7 +565,7 @@ def _run_job(args):
         compute_privacy_score,
         dataset_name,
         modality,
-        path,
+        synthesizer_path,
     ) = args
 
     name = synthesizer['name']
@@ -590,7 +590,7 @@ def _run_job(args):
                 compute_privacy_score=compute_privacy_score,
                 modality=modality,
                 dataset_name=dataset_name,
-                path=path,
+                synthesizer_path=synthesizer_path,
             )
         else:
             output = _score(
@@ -603,7 +603,7 @@ def _run_job(args):
                 compute_privacy_score=compute_privacy_score,
                 modality=modality,
                 dataset_name=dataset_name,
-                path=path,
+                synthesizer_path=synthesizer_path,
             )
     except Exception as error:
         output['exception'] = error
@@ -618,6 +618,15 @@ def _run_job(args):
         cache_dir,
     )
 
+    if synthesizer_path is not None:
+        synth_path = Path(synthesizer_path['synthesizer'])
+        root_path = synth_path.parents[2]
+        result_file = root_path / 'results.csv'
+        if not result_file.exists():
+            scores.to_csv(result_file, index=False, mode='w')
+        else:
+            scores.to_csv(result_file, index=False, mode='a', header=False)
+
     return scores
 
 
@@ -888,7 +897,7 @@ def _validate_output_destination(output_destination):
 
 
 def _write_run_id_file(output_destination, synthesizers, job_args_list):
-    jobs = [[job[0]['name'], job[-3]] for job in job_args_list]
+    jobs = [[job[-3], job[0]['name']] for job in job_args_list]
     run_id = str(uuid.uuid4())[:8]
     metadata = {
         'run_id': run_id,
diff --git a/tests/unit/test_benchmark.py b/tests/unit/test_benchmark.py
index ac1c642d..9ed5c144 100644
--- a/tests/unit/test_benchmark.py
+++ b/tests/unit/test_benchmark.py
@@ -422,7 +422,7 @@ def test__write_run_id_file(mock_datetime, mock_uuid, tmp_path):
         ({'name': 'GaussianCopulaSynthesizer'}, 'adult', None, None),
         ({'name': 'CTGANSynthesizer'}, 'census', None, None),
     ]
-    expected_jobs = [['GaussianCopulaSynthesizer', 'adult'], ['CTGANSynthesizer', 'census']]
+    expected_jobs = [['adult', 'GaussianCopulaSynthesizer'], ['census', 'CTGANSynthesizer']]
     synthesizers = ['GaussianCopulaSynthesizer', 'CTGANSynthesizer', 'RealTabFormerSynthesizer']
 
     # Run

From 98f6ce3891d5ee1d1aa59af19eeb87fc52f552e8 Mon Sep 17 00:00:00 2001
From: R-Palazzo <romainpalazzo@gmail.com>
Date: Thu, 26 Jun 2025 17:34:20 +0100
Subject: [PATCH 06/14] integration test

---
 tests/integration/test_benchmark.py | 43 +++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/tests/integration/test_benchmark.py b/tests/integration/test_benchmark.py
index 8d52a993..2302da16 100644
--- a/tests/integration/test_benchmark.py
+++ b/tests/integration/test_benchmark.py
@@ -2,6 +2,7 @@
 
 import contextlib
 import io
+import os
 import re
 import sys
 import time
@@ -10,6 +11,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+import yaml
 from sdv.metadata.single_table import SingleTableMetadata
 from sdv.single_table.copulas import GaussianCopulaSynthesizer
 
@@ -585,3 +587,44 @@ def test_benchmark_single_table_no_warnings():
         )
         future_warnings = [warning for warning in w if issubclass(warning.category, FutureWarning)]
         assert len(future_warnings) == 0
+
+
+def test_benchmark_single_table_with_output_destination(tmp_path):
+    """Test it works with the ``output_destination`` argument."""
+    # Setup
+    output_destination = str(tmp_path / 'benchmark_output')
+    today_date = pd.Timestamp.now().strftime('%m_%d_%Y')
+
+    # Run
+    results = benchmark_single_table(
+        synthesizers=['GaussianCopulaSynthesizer', 'TVAESynthesizer'],
+        sdv_datasets=['expedia_hotel_logs'],
+        output_destination=output_destination,
+    )
+
+    # Assert
+    directions = os.listdir(output_destination)
+    assert f'SDGym_results_{today_date}' in directions
+    for file in directions:
+        if file.endswith('.yaml'):
+            with open(os.path.join(output_destination, file), 'r') as f:
+                metadata = yaml.safe_load(f)
+                assert metadata['completed_date'] is not None
+                assert metadata['sdgym_version'] == sdgym.__version__
+        else:
+            subdirections = os.listdir(os.path.join(output_destination, file))
+            assert set(subdirections) == {'results.csv', f'expedia_hotel_logs_{today_date}'}
+            synthesizer_directions = os.listdir(
+                os.path.join(output_destination, file, f'expedia_hotel_logs_{today_date}')
+            )
+            assert set(synthesizer_directions) == {'TVAESynthesizer', 'GaussianCopulaSynthesizer'}
+            for synthesizer in synthesizer_directions:
+                synthesizer_files = os.listdir(
+                    os.path.join(
+                        output_destination, file, f'expedia_hotel_logs_{today_date}', synthesizer
+                    )
+                )
+                assert set(synthesizer_files) == {'synthesizer.pkl', 'synthetic_data.csv'}
+
+    saved_result = pd.read_csv(f'{output_destination}/SDGym_results_{today_date}/results.csv')
+    pd.testing.assert_frame_equal(results, saved_result, check_dtype=False)

From d83d96c10d5c4e58a390e9b53de6963e19507ec5 Mon Sep 17 00:00:00 2001
From: R-Palazzo <romainpalazzo@gmail.com>
Date: Fri, 27 Jun 2025 16:43:24 +0100
Subject: [PATCH 07/14] allow existing folder

---
 sdgym/benchmark.py                  |  3 --
 tests/integration/test_benchmark.py | 53 +++++++++++++++++++++++++++++
 tests/unit/test_benchmark.py        |  5 ---
 3 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
index c638d2df..ee90360b 100644
--- a/sdgym/benchmark.py
+++ b/sdgym/benchmark.py
@@ -892,9 +892,6 @@ def _validate_output_destination(output_destination):
             'Please use `benchmark_single_table_aws` instead.'
         )
 
-    if os.path.exists(output_destination):
-        raise ValueError(f'The output path {output_destination} already exists.')
-
 
 def _write_run_id_file(output_destination, synthesizers, job_args_list):
     jobs = [[job[-3], job[0]['name']] for job in job_args_list]
diff --git a/tests/integration/test_benchmark.py b/tests/integration/test_benchmark.py
index 2302da16..ff4e63f7 100644
--- a/tests/integration/test_benchmark.py
+++ b/tests/integration/test_benchmark.py
@@ -628,3 +628,56 @@ def test_benchmark_single_table_with_output_destination(tmp_path):
 
     saved_result = pd.read_csv(f'{output_destination}/SDGym_results_{today_date}/results.csv')
     pd.testing.assert_frame_equal(results, saved_result, check_dtype=False)
+
+
+def test_benchmark_single_table_with_output_destination_multiple_runs(tmp_path):
+    """Test saving in ``output_destination`` with multiple runs.
+
+    Here two benchmark runs are performed with different synthesizers
+    on the same dataset, and the results are saved in the same output directory.
+    The directory contains a `results.csv` file with the combined results
+    and a subdirectory for each synthesizer with its own results.
+    """
+    # Setup
+    output_destination = str(tmp_path / 'benchmark_output')
+    today_date = pd.Timestamp.now().strftime('%m_%d_%Y')
+
+    # Run
+    result_1 = benchmark_single_table(
+        synthesizers=['GaussianCopulaSynthesizer'],
+        sdv_datasets=['expedia_hotel_logs'],
+        output_destination=output_destination,
+    )
+    result_2 = benchmark_single_table(
+        synthesizers=['TVAESynthesizer'],
+        sdv_datasets=['expedia_hotel_logs'],
+        output_destination=output_destination,
+    )
+
+    # Assert
+    final_results = pd.concat([result_1, result_2], ignore_index=True)
+    directions = os.listdir(output_destination)
+    assert f'SDGym_results_{today_date}' in directions
+    for file in directions:
+        if file.endswith('.yaml'):
+            with open(os.path.join(output_destination, file), 'r') as f:
+                metadata = yaml.safe_load(f)
+                assert metadata['completed_date'] is not None
+                assert metadata['sdgym_version'] == sdgym.__version__
+        else:
+            subdirections = os.listdir(os.path.join(output_destination, file))
+            assert set(subdirections) == {'results.csv', f'expedia_hotel_logs_{today_date}'}
+            synthesizer_directions = os.listdir(
+                os.path.join(output_destination, file, f'expedia_hotel_logs_{today_date}')
+            )
+            assert set(synthesizer_directions) == {'TVAESynthesizer', 'GaussianCopulaSynthesizer'}
+            for synthesizer in synthesizer_directions:
+                synthesizer_files = os.listdir(
+                    os.path.join(
+                        output_destination, file, f'expedia_hotel_logs_{today_date}', synthesizer
+                    )
+                )
+                assert set(synthesizer_files) == {'synthesizer.pkl', 'synthetic_data.csv'}
+
+    saved_result = pd.read_csv(f'{output_destination}/SDGym_results_{today_date}/results.csv')
+    pd.testing.assert_frame_equal(final_results, saved_result, check_dtype=False)
diff --git a/tests/unit/test_benchmark.py b/tests/unit/test_benchmark.py
index 9ed5c144..c2a60c66 100644
--- a/tests/unit/test_benchmark.py
+++ b/tests/unit/test_benchmark.py
@@ -357,7 +357,6 @@ def test__validate_output_destination(tmp_path):
         'The `output_destination` parameter cannot be an S3 path. '
         'Please use `benchmark_single_table_aws` instead.'
     )
-    err_3 = re.escape(f'The output path {valid_destination} already exists.')
 
     # Run and Assert
     _validate_output_destination(str(valid_destination))
@@ -367,10 +366,6 @@ def test__validate_output_destination(tmp_path):
     with pytest.raises(ValueError, match=err_2):
         _validate_output_destination(aws_destination)
 
-    valid_destination.mkdir()
-    with pytest.raises(ValueError, match=err_3):
-        _validate_output_destination(str(valid_destination))
-
 
 @patch('sdgym.benchmark._validate_output_destination')
 def test__setup_output_destination(mock_validate, tmp_path):

From 995ab2eecffb54bce82ab37e367cceca248c8f14 Mon Sep 17 00:00:00 2001
From: R-Palazzo <romainpalazzo@gmail.com>
Date: Fri, 27 Jun 2025 17:08:00 +0100
Subject: [PATCH 08/14] update naming

---
 sdgym/benchmark.py                  |  4 ++--
 tests/integration/test_benchmark.py | 28 +++++++++++++++++-----------
 tests/unit/test_benchmark.py        |  4 ++--
 3 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
index ee90360b..fe7ee099 100644
--- a/sdgym/benchmark.py
+++ b/sdgym/benchmark.py
@@ -140,8 +140,8 @@ def _setup_output_destination(output_destination, synthesizers, datasets):
             synth_folder.mkdir(parents=True, exist_ok=True)
 
             paths[dataset][synth_name] = {
-                'synthesizer': str(synth_folder / 'synthesizer.pkl'),
-                'synthetic_data': str(synth_folder / 'synthetic_data.csv'),
+                'synthesizer': str(synth_folder / f'{synth_name}_synthesizer.pkl'),
+                'synthetic_data': str(synth_folder / f'{synth_name}_synthetic_data.csv'),
             }
 
     return paths
diff --git a/tests/integration/test_benchmark.py b/tests/integration/test_benchmark.py
index ff4e63f7..a20be7a8 100644
--- a/tests/integration/test_benchmark.py
+++ b/tests/integration/test_benchmark.py
@@ -598,7 +598,7 @@ def test_benchmark_single_table_with_output_destination(tmp_path):
     # Run
     results = benchmark_single_table(
         synthesizers=['GaussianCopulaSynthesizer', 'TVAESynthesizer'],
-        sdv_datasets=['expedia_hotel_logs'],
+        sdv_datasets=['fake_companies'],
         output_destination=output_destination,
     )
 
@@ -613,18 +613,21 @@ def test_benchmark_single_table_with_output_destination(tmp_path):
                 assert metadata['sdgym_version'] == sdgym.__version__
         else:
             subdirections = os.listdir(os.path.join(output_destination, file))
-            assert set(subdirections) == {'results.csv', f'expedia_hotel_logs_{today_date}'}
+            assert set(subdirections) == {'results.csv', f'fake_companies_{today_date}'}
             synthesizer_directions = os.listdir(
-                os.path.join(output_destination, file, f'expedia_hotel_logs_{today_date}')
+                os.path.join(output_destination, file, f'fake_companies_{today_date}')
             )
             assert set(synthesizer_directions) == {'TVAESynthesizer', 'GaussianCopulaSynthesizer'}
             for synthesizer in synthesizer_directions:
                 synthesizer_files = os.listdir(
                     os.path.join(
-                        output_destination, file, f'expedia_hotel_logs_{today_date}', synthesizer
+                        output_destination, file, f'fake_companies_{today_date}', synthesizer
                     )
                 )
-                assert set(synthesizer_files) == {'synthesizer.pkl', 'synthetic_data.csv'}
+                assert set(synthesizer_files) == {
+                    f'{synthesizer}_synthesizer.pkl',
+                    f'{synthesizer}_synthetic_data.csv',
+                }
 
     saved_result = pd.read_csv(f'{output_destination}/SDGym_results_{today_date}/results.csv')
     pd.testing.assert_frame_equal(results, saved_result, check_dtype=False)
@@ -645,12 +648,12 @@ def test_benchmark_single_table_with_output_destination_multiple_runs(tmp_path):
     # Run
     result_1 = benchmark_single_table(
         synthesizers=['GaussianCopulaSynthesizer'],
-        sdv_datasets=['expedia_hotel_logs'],
+        sdv_datasets=['fake_companies'],
         output_destination=output_destination,
     )
     result_2 = benchmark_single_table(
         synthesizers=['TVAESynthesizer'],
-        sdv_datasets=['expedia_hotel_logs'],
+        sdv_datasets=['fake_companies'],
         output_destination=output_destination,
     )
 
@@ -666,18 +669,21 @@ def test_benchmark_single_table_with_output_destination_multiple_runs(tmp_path):
                 assert metadata['sdgym_version'] == sdgym.__version__
         else:
             subdirections = os.listdir(os.path.join(output_destination, file))
-            assert set(subdirections) == {'results.csv', f'expedia_hotel_logs_{today_date}'}
+            assert set(subdirections) == {'results.csv', f'fake_companies_{today_date}'}
             synthesizer_directions = os.listdir(
-                os.path.join(output_destination, file, f'expedia_hotel_logs_{today_date}')
+                os.path.join(output_destination, file, f'fake_companies_{today_date}')
             )
             assert set(synthesizer_directions) == {'TVAESynthesizer', 'GaussianCopulaSynthesizer'}
             for synthesizer in synthesizer_directions:
                 synthesizer_files = os.listdir(
                     os.path.join(
-                        output_destination, file, f'expedia_hotel_logs_{today_date}', synthesizer
+                        output_destination, file, f'fake_companies_{today_date}', synthesizer
                     )
                 )
-                assert set(synthesizer_files) == {'synthesizer.pkl', 'synthetic_data.csv'}
+                assert set(synthesizer_files) == {
+                    f'{synthesizer}_synthesizer.pkl',
+                    f'{synthesizer}_synthetic_data.csv',
+                }
 
     saved_result = pd.read_csv(f'{output_destination}/SDGym_results_{today_date}/results.csv')
     pd.testing.assert_frame_equal(final_results, saved_result, check_dtype=False)
diff --git a/tests/unit/test_benchmark.py b/tests/unit/test_benchmark.py
index c2a60c66..e003e997 100644
--- a/tests/unit/test_benchmark.py
+++ b/tests/unit/test_benchmark.py
@@ -388,10 +388,10 @@ def test__setup_output_destination(mock_validate, tmp_path):
             **{
                 synth: {
                     'synthesizer': str(
-                        base_path / f'{dataset}_{today}' / synth / 'synthesizer.pkl'
+                        base_path / f'{dataset}_{today}' / synth / f'{synth}_synthesizer.pkl'
                     ),
                     'synthetic_data': str(
-                        base_path / f'{dataset}_{today}' / synth / 'synthetic_data.csv'
+                        base_path / f'{dataset}_{today}' / synth / f'{synth}_synthetic_data.csv'
                     ),
                 }
                 for synth in synthesizers

From 1b70b044812aa64b6cc025cc827646899ca80c8e Mon Sep 17 00:00:00 2001
From: R-Palazzo <romainpalazzo@gmail.com>
Date: Mon, 7 Jul 2025 12:38:59 +0200
Subject: [PATCH 09/14] update warning message

---
 sdgym/benchmark.py           | 12 +++++-------
 tests/unit/test_benchmark.py | 23 ++++++++++++-----------
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
index fe7ee099..73edc828 100644
--- a/sdgym/benchmark.py
+++ b/sdgym/benchmark.py
@@ -856,14 +856,13 @@ def _create_instance_on_ec2(script_content):
 
 
 def _handle_deprecated_parameters(
-    output_filepath, detailed_results_folder, multi_processing_config, run_on_ec2
+    output_filepath, detailed_results_folder, multi_processing_config
 ):
     """Handle deprecated parameters and issue warnings."""
     parameters_to_deprecate = {
         'output_filepath': output_filepath,
         'detailed_results_folder': detailed_results_folder,
         'multi_processing_config': multi_processing_config,
-        'run_on_ec2': run_on_ec2,
     }
     parameters = []
     for name, value in parameters_to_deprecate.items():
@@ -873,9 +872,10 @@ def _handle_deprecated_parameters(
     if parameters:
         parameters = "', '".join(sorted(parameters))
         message = (
-            f"Parameters '{parameters}' are deprecated in the `benchmark_single_table` "
+            f"Parameters '{parameters}' are deprecated in the 'benchmark_single_table' "
             'function and will be removed in October 2025. '
-            'Please consider using `output_destination` instead.'
+            "For saving results, please use the 'output_destination' parameter. For running SDGym"
+            " remotely on AWS please use the 'benchmark_single_table_aws' method."
         )
         warnings.warn(message, FutureWarning)
 
@@ -1024,9 +1024,7 @@ def benchmark_single_table(
         pandas.DataFrame:
             A table containing one row per synthesizer + dataset + metric.
     """
-    _handle_deprecated_parameters(
-        output_filepath, detailed_results_folder, multi_processing_config, run_on_ec2
-    )
+    _handle_deprecated_parameters(output_filepath, detailed_results_folder, multi_processing_config)
     if run_on_ec2:
         print("This will create an instance for the current AWS user's account.")  # noqa
         if output_filepath is not None:
diff --git a/tests/unit/test_benchmark.py b/tests/unit/test_benchmark.py
index e003e997..397b1889 100644
--- a/tests/unit/test_benchmark.py
+++ b/tests/unit/test_benchmark.py
@@ -59,7 +59,7 @@ def test_benchmark_single_table_deprecated_params(mock_handle_deprecated, tqdm_m
     )
 
     # Assert
-    mock_handle_deprecated.assert_called_once_with(None, None, None, False)
+    mock_handle_deprecated.assert_called_once_with(None, None, None)
     tqdm_mock.assert_called_once_with(ANY, total=1, position=0, leave=True)
 
 
@@ -321,26 +321,27 @@ def test__handle_deprecated_parameters():
     output_filepath = 's3://BucketName/path'
     detailed_results_folder = 'mock/path'
     multi_processing_config = {'num_processes': 4}
-    run_on_ec2 = True
-    expected_message_1 = re.escape(
+    expected_message_1 = (
         "Parameters 'detailed_results_folder', 'output_filepath' are deprecated in the "
-        '`benchmark_single_table` function and will be removed in October 2025. Please '
-        'consider using `output_destination` instead.'
+        "'benchmark_single_table' function and will be removed in October 2025. For saving"
+        " results, please use the 'output_destination' parameter. For running SDGym remotely"
+        " on AWS please use the 'benchmark_single_table_aws' method."
     )
-    expected_message_2 = re.escape(
+    expected_message_2 = (
         "Parameters 'detailed_results_folder', 'multi_processing_config', 'output_filepath'"
-        ", 'run_on_ec2' are deprecated in the `benchmark_single_table` function and will be"
-        ' removed in October 2025. Please consider using `output_destination` instead.'
+        " are deprecated in the 'benchmark_single_table' function and will be removed in October"
+        " 2025. For saving results, please use the 'output_destination' parameter. For running"
+        " SDGym remotely on AWS please use the 'benchmark_single_table_aws' method."
     )
 
     # Run and Assert
-    _handle_deprecated_parameters(None, None, None, False)
+    _handle_deprecated_parameters(None, None, None)
     with pytest.warns(FutureWarning, match=expected_message_1):
-        _handle_deprecated_parameters(output_filepath, detailed_results_folder, None, False)
+        _handle_deprecated_parameters(output_filepath, detailed_results_folder, None)
 
     with pytest.warns(FutureWarning, match=expected_message_2):
         _handle_deprecated_parameters(
-            output_filepath, detailed_results_folder, multi_processing_config, run_on_ec2
+            output_filepath, detailed_results_folder, multi_processing_config
         )
 
 

From 341c77d0b2307e4fa4d355d171b723ae9adcb5cf Mon Sep 17 00:00:00 2001
From: R-Palazzo <romainpalazzo@gmail.com>
Date: Mon, 7 Jul 2025 14:49:30 +0200
Subject: [PATCH 10/14] add a locker to serialize savings

---
 pyproject.toml     |  1 +
 sdgym/benchmark.py | 15 +++++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0553c69f..6d37e224 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,6 +56,7 @@ dependencies = [
     'rdt>=1.17.0',
     'sdmetrics>=0.20.1',
     'sdv>=1.21.0',
+    'portalocker>=3.2.0',
 ]
 
 [project.urls]
diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
index 73edc828..95e078c8 100644
--- a/sdgym/benchmark.py
+++ b/sdgym/benchmark.py
@@ -14,6 +14,7 @@
 from datetime import datetime
 from importlib.metadata import version
 from pathlib import Path
+import portalocker
 
 import boto3
 import cloudpickle
@@ -548,6 +549,15 @@ def _format_output(
 
     return scores
 
+def safe_append(scores, result_file):
+    result_file = Path(result_file)
+    result_file.parent.mkdir(parents=True, exist_ok=True)
+
+    with open(result_file, 'a+') as f:
+        portalocker.lock(f, portalocker.LOCK_EX)
+        f.seek(0, 2)
+        is_empty = f.tell() == 0
+        scores.to_csv(f, index=False, header=is_empty)
 
 def _run_job(args):
     # Reset random seed
@@ -622,10 +632,7 @@ def _run_job(args):
         synth_path = Path(synthesizer_path['synthesizer'])
         root_path = synth_path.parents[2]
         result_file = root_path / 'results.csv'
-        if not result_file.exists():
-            scores.to_csv(result_file, index=False, mode='w')
-        else:
-            scores.to_csv(result_file, index=False, mode='a', header=False)
+        safe_append(scores, result_file)
 
     return scores
 

From 0f81febd551a9ef69a03005dfc1b260b3d0cc2ee Mon Sep 17 00:00:00 2001
From: R-Palazzo <romainpalazzo@gmail.com>
Date: Mon, 7 Jul 2025 14:59:46 +0200
Subject: [PATCH 11/14] update portallocker version

---
 pyproject.toml     | 2 +-
 sdgym/benchmark.py | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6d37e224..38add8ae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,7 +56,7 @@ dependencies = [
     'rdt>=1.17.0',
     'sdmetrics>=0.20.1',
     'sdv>=1.21.0',
-    'portalocker>=3.2.0',
+    'portalocker>=3.0.0',
 ]
 
 [project.urls]
diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
index 95e078c8..b88118b6 100644
--- a/sdgym/benchmark.py
+++ b/sdgym/benchmark.py
@@ -14,13 +14,13 @@
 from datetime import datetime
 from importlib.metadata import version
 from pathlib import Path
-import portalocker
 
 import boto3
 import cloudpickle
 import compress_pickle
 import numpy as np
 import pandas as pd
+import portalocker
 import tqdm
 import yaml
 from sdmetrics.reports.multi_table import (
@@ -549,7 +549,8 @@ def _format_output(
 
     return scores
 
-def safe_append(scores, result_file):
+
+def _safe_append(scores, result_file):
     result_file = Path(result_file)
     result_file.parent.mkdir(parents=True, exist_ok=True)
 
@@ -559,6 +560,7 @@ def safe_append(scores, result_file):
         is_empty = f.tell() == 0
         scores.to_csv(f, index=False, header=is_empty)
 
+
 def _run_job(args):
     # Reset random seed
     np.random.seed()
@@ -632,7 +634,7 @@ def _run_job(args):
         synth_path = Path(synthesizer_path['synthesizer'])
         root_path = synth_path.parents[2]
         result_file = root_path / 'results.csv'
-        safe_append(scores, result_file)
+        _safe_append(scores, result_file)
 
     return scores
 

From a1502a65c8788b0b0ba22cfb20b285787ca45365 Mon Sep 17 00:00:00 2001
From: R-Palazzo <romainpalazzo@gmail.com>
Date: Mon, 7 Jul 2025 15:00:41 +0200
Subject: [PATCH 12/14] cleaning

---
 sdgym/benchmark.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
index b88118b6..df255e2c 100644
--- a/sdgym/benchmark.py
+++ b/sdgym/benchmark.py
@@ -553,12 +553,11 @@ def _format_output(
 def _safe_append(scores, result_file):
     result_file = Path(result_file)
     result_file.parent.mkdir(parents=True, exist_ok=True)
-
-    with open(result_file, 'a+') as f:
-        portalocker.lock(f, portalocker.LOCK_EX)
-        f.seek(0, 2)
-        is_empty = f.tell() == 0
-        scores.to_csv(f, index=False, header=is_empty)
+    with open(result_file, 'a+') as file:
+        portalocker.lock(file, portalocker.LOCK_EX)
+        file.seek(0, 2)
+        is_empty = file.tell() == 0
+        scores.to_csv(file, index=False, header=is_empty)
 
 
 def _run_job(args):

From 1d77b581bdaa77f7d3a907cfa75063763a4f0c84 Mon Sep 17 00:00:00 2001
From: R-Palazzo <romainpalazzo@gmail.com>
Date: Tue, 8 Jul 2025 13:20:58 +0200
Subject: [PATCH 13/14] remove portallocker and save files separately + all
 together

---
 pyproject.toml                      |  1 -
 sdgym/benchmark.py                  | 26 ++++++++++------------
 tests/integration/test_benchmark.py | 34 +++++++++++++++++++++++++++--
 tests/unit/test_benchmark.py        |  3 +++
 4 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 38add8ae..0553c69f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,7 +56,6 @@ dependencies = [
     'rdt>=1.17.0',
     'sdmetrics>=0.20.1',
     'sdv>=1.21.0',
-    'portalocker>=3.0.0',
 ]
 
 [project.urls]
diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
index df255e2c..492f90d7 100644
--- a/sdgym/benchmark.py
+++ b/sdgym/benchmark.py
@@ -20,7 +20,6 @@
 import compress_pickle
 import numpy as np
 import pandas as pd
-import portalocker
 import tqdm
 import yaml
 from sdmetrics.reports.multi_table import (
@@ -143,6 +142,7 @@ def _setup_output_destination(output_destination, synthesizers, datasets):
             paths[dataset][synth_name] = {
                 'synthesizer': str(synth_folder / f'{synth_name}_synthesizer.pkl'),
                 'synthetic_data': str(synth_folder / f'{synth_name}_synthetic_data.csv'),
+                'benchmark_result': str(synth_folder / f'{synth_name}_benchmark_result.csv'),
             }
 
     return paths
@@ -550,16 +550,6 @@ def _format_output(
     return scores
 
 
-def _safe_append(scores, result_file):
-    result_file = Path(result_file)
-    result_file.parent.mkdir(parents=True, exist_ok=True)
-    with open(result_file, 'a+') as file:
-        portalocker.lock(file, portalocker.LOCK_EX)
-        file.seek(0, 2)
-        is_empty = file.tell() == 0
-        scores.to_csv(file, index=False, header=is_empty)
-
-
 def _run_job(args):
     # Reset random seed
     np.random.seed()
@@ -630,10 +620,7 @@ def _run_job(args):
     )
 
     if synthesizer_path is not None:
-        synth_path = Path(synthesizer_path['synthesizer'])
-        root_path = synth_path.parents[2]
-        result_file = root_path / 'results.csv'
-        _safe_append(scores, result_file)
+        scores.to_csv(synthesizer_path['benchmark_result'], index=False)
 
     return scores
 
@@ -691,6 +678,15 @@ def _run_jobs(multi_processing_config, job_args_list, show_progress):
         raise SDGymError('No valid Dataset/Synthesizer combination given.')
 
     scores = pd.concat(scores, ignore_index=True)
+    output_directions = job_args_list[0][-1]
+    if output_directions and isinstance(output_directions, dict):
+        synth_path = Path(output_directions['synthesizer'])
+        root_path = synth_path.parents[2]
+        result_file = root_path / 'results.csv'
+        if not result_file.exists():
+            scores.to_csv(result_file, index=False, mode='w')
+        else:
+            scores.to_csv(result_file, index=False, mode='a', header=False)
 
     return scores
 
diff --git a/tests/integration/test_benchmark.py b/tests/integration/test_benchmark.py
index a20be7a8..e6d0dd0c 100644
--- a/tests/integration/test_benchmark.py
+++ b/tests/integration/test_benchmark.py
@@ -604,6 +604,7 @@ def test_benchmark_single_table_with_output_destination(tmp_path):
 
     # Assert
     directions = os.listdir(output_destination)
+    score_saved_separately = pd.DataFrame()
     assert f'SDGym_results_{today_date}' in directions
     for file in directions:
         if file.endswith('.yaml'):
@@ -618,7 +619,7 @@ def test_benchmark_single_table_with_output_destination(tmp_path):
                 os.path.join(output_destination, file, f'fake_companies_{today_date}')
             )
             assert set(synthesizer_directions) == {'TVAESynthesizer', 'GaussianCopulaSynthesizer'}
-            for synthesizer in synthesizer_directions:
+            for synthesizer in sorted(synthesizer_directions):
                 synthesizer_files = os.listdir(
                     os.path.join(
                         output_destination, file, f'fake_companies_{today_date}', synthesizer
@@ -627,10 +628,24 @@ def test_benchmark_single_table_with_output_destination(tmp_path):
                 assert set(synthesizer_files) == {
                     f'{synthesizer}_synthesizer.pkl',
                     f'{synthesizer}_synthetic_data.csv',
+                    f'{synthesizer}_benchmark_result.csv',
                 }
+                score = pd.read_csv(
+                    os.path.join(
+                        output_destination,
+                        file,
+                        f'fake_companies_{today_date}',
+                        synthesizer,
+                        f'{synthesizer}_benchmark_result.csv',
+                    )
+                )
+                score_saved_separately = pd.concat(
+                    [score_saved_separately, score], ignore_index=True
+                )
 
     saved_result = pd.read_csv(f'{output_destination}/SDGym_results_{today_date}/results.csv')
     pd.testing.assert_frame_equal(results, saved_result, check_dtype=False)
+    pd.testing.assert_frame_equal(results, score_saved_separately, check_dtype=False)
 
 
 def test_benchmark_single_table_with_output_destination_multiple_runs(tmp_path):
@@ -659,6 +674,7 @@ def test_benchmark_single_table_with_output_destination_multiple_runs(tmp_path):
 
     # Assert
     final_results = pd.concat([result_1, result_2], ignore_index=True)
+    score_saved_separately = pd.DataFrame()
     directions = os.listdir(output_destination)
     assert f'SDGym_results_{today_date}' in directions
     for file in directions:
@@ -674,7 +690,7 @@ def test_benchmark_single_table_with_output_destination_multiple_runs(tmp_path):
                 os.path.join(output_destination, file, f'fake_companies_{today_date}')
             )
             assert set(synthesizer_directions) == {'TVAESynthesizer', 'GaussianCopulaSynthesizer'}
-            for synthesizer in synthesizer_directions:
+            for synthesizer in sorted(synthesizer_directions):
                 synthesizer_files = os.listdir(
                     os.path.join(
                         output_destination, file, f'fake_companies_{today_date}', synthesizer
@@ -683,7 +699,21 @@ def test_benchmark_single_table_with_output_destination_multiple_runs(tmp_path):
                 assert set(synthesizer_files) == {
                     f'{synthesizer}_synthesizer.pkl',
                     f'{synthesizer}_synthetic_data.csv',
+                    f'{synthesizer}_benchmark_result.csv',
                 }
+                score = pd.read_csv(
+                    os.path.join(
+                        output_destination,
+                        file,
+                        f'fake_companies_{today_date}',
+                        synthesizer,
+                        f'{synthesizer}_benchmark_result.csv',
+                    )
+                )
+                score_saved_separately = pd.concat(
+                    [score_saved_separately, score], ignore_index=True
+                )
 
     saved_result = pd.read_csv(f'{output_destination}/SDGym_results_{today_date}/results.csv')
     pd.testing.assert_frame_equal(final_results, saved_result, check_dtype=False)
+    pd.testing.assert_frame_equal(final_results, score_saved_separately, check_dtype=False)
diff --git a/tests/unit/test_benchmark.py b/tests/unit/test_benchmark.py
index 397b1889..f85358d7 100644
--- a/tests/unit/test_benchmark.py
+++ b/tests/unit/test_benchmark.py
@@ -394,6 +394,9 @@ def test__setup_output_destination(mock_validate, tmp_path):
                     'synthetic_data': str(
                         base_path / f'{dataset}_{today}' / synth / f'{synth}_synthetic_data.csv'
                     ),
+                    'benchmark_result': str(
+                        base_path / f'{dataset}_{today}' / synth / f'{synth}_benchmark_result.csv'
+                    ),
                 }
                 for synth in synthesizers
             },

From 662786d61c83a81ccb9d4c77c0537b194de8612f Mon Sep 17 00:00:00 2001
From: R-Palazzo <romainpalazzo@gmail.com>
Date: Thu, 10 Jul 2025 10:43:20 +0200
Subject: [PATCH 14/14] restructure files

---
 sdgym/benchmark.py                  |  38 ++++---
 tests/integration/test_benchmark.py | 170 +++++++++++++++-------------
 tests/unit/test_benchmark.py        |  29 +++--
 3 files changed, 129 insertions(+), 108 deletions(-)

diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
index 492f90d7..17db2e6f 100644
--- a/sdgym/benchmark.py
+++ b/sdgym/benchmark.py
@@ -6,8 +6,8 @@
 import multiprocessing
 import os
 import pickle
+import re
 import tracemalloc
-import uuid
 import warnings
 from collections import defaultdict
 from contextlib import contextmanager
@@ -128,12 +128,22 @@ def _setup_output_destination(output_destination, synthesizers, datasets):
     today = datetime.today().strftime('%m_%d_%Y')
     top_folder = output_path / f'SDGym_results_{today}'
     top_folder.mkdir(parents=True, exist_ok=True)
+    pattern = re.compile(rf'run_{re.escape(today)}_(\d+)\.yaml$')
+    increments = []
+    for file in top_folder.glob(f'run_{today}_*.yaml'):
+        match = pattern.match(file.name)
+        if match:
+            increments.append(int(match.group(1)))
+
+    if increments:
+        next_increment = max(increments) + 1
+    else:
+        next_increment = 1
 
     paths = defaultdict(dict)
     for dataset in datasets:
         dataset_folder = top_folder / f'{dataset}_{today}'
         dataset_folder.mkdir(parents=True, exist_ok=True)
-        paths[dataset]['meta'] = str(dataset_folder / 'meta.yaml')
 
         for synth_name in synthesizers:
             synth_folder = dataset_folder / synth_name
@@ -143,6 +153,8 @@ def _setup_output_destination(output_destination, synthesizers, datasets):
                 'synthesizer': str(synth_folder / f'{synth_name}_synthesizer.pkl'),
                 'synthetic_data': str(synth_folder / f'{synth_name}_synthetic_data.csv'),
                 'benchmark_result': str(synth_folder / f'{synth_name}_benchmark_result.csv'),
+                'run_id': str(top_folder / f'run_{today}_{next_increment}.yaml'),
+                'results': str(top_folder / f'results_{today}_{next_increment}.csv'),
             }
 
     return paths
@@ -680,9 +692,7 @@ def _run_jobs(multi_processing_config, job_args_list, show_progress):
     scores = pd.concat(scores, ignore_index=True)
     output_directions = job_args_list[0][-1]
     if output_directions and isinstance(output_directions, dict):
-        synth_path = Path(output_directions['synthesizer'])
-        root_path = synth_path.parents[2]
-        result_file = root_path / 'results.csv'
+        result_file = Path(output_directions['results'])
         if not result_file.exists():
             scores.to_csv(result_file, index=False, mode='w')
         else:
@@ -897,9 +907,11 @@ def _validate_output_destination(output_destination):
         )
 
 
-def _write_run_id_file(output_destination, synthesizers, job_args_list):
+def _write_run_id_file(synthesizers, job_args_list):
     jobs = [[job[-3], job[0]['name']] for job in job_args_list]
-    run_id = str(uuid.uuid4())[:8]
+    output_directions = job_args_list[0][-1]
+    path = output_directions['run_id']
+    run_id = Path(path).stem
     metadata = {
         'run_id': run_id,
         'starting_date': datetime.today().strftime('%m_%d_%Y %H:%M:%S'),
@@ -915,14 +927,11 @@ def _write_run_id_file(output_destination, synthesizers, job_args_list):
         elif 'sdv' not in metadata.keys():
             metadata['sdv_version'] = version('sdv')
 
-    with open(f'{output_destination}/run_{run_id}.yaml', 'w') as file:
+    with open(path, 'w') as file:
         yaml.dump(metadata, file)
 
-    return run_id
-
 
-def _update_run_id_file(output_destination, run_id):
-    run_file = Path(output_destination) / f'run_{run_id}.yaml'
+def _update_run_id_file(run_file):
     with open(run_file, 'r') as f:
         run_data = yaml.safe_load(f) or {}
 
@@ -1055,7 +1064,7 @@ def benchmark_single_table(
         custom_synthesizers,
     )
     if output_destination is not None:
-        run_id = _write_run_id_file(output_destination, synthesizers, job_args_list)
+        _write_run_id_file(synthesizers, job_args_list)
 
     if job_args_list:
         scores = _run_jobs(multi_processing_config, job_args_list, show_progress)
@@ -1073,6 +1082,7 @@ def benchmark_single_table(
         write_csv(scores, output_filepath, None, None)
 
     if output_destination is not None:
-        _update_run_id_file(output_destination, run_id)
+        run_id_filename = job_args_list[0][-1]['run_id']
+        _update_run_id_file(run_id_filename)
 
     return scores
diff --git a/tests/integration/test_benchmark.py b/tests/integration/test_benchmark.py
index e6d0dd0c..e3932a27 100644
--- a/tests/integration/test_benchmark.py
+++ b/tests/integration/test_benchmark.py
@@ -605,45 +605,49 @@ def test_benchmark_single_table_with_output_destination(tmp_path):
     # Assert
     directions = os.listdir(output_destination)
     score_saved_separately = pd.DataFrame()
-    assert f'SDGym_results_{today_date}' in directions
-    for file in directions:
-        if file.endswith('.yaml'):
-            with open(os.path.join(output_destination, file), 'r') as f:
-                metadata = yaml.safe_load(f)
-                assert metadata['completed_date'] is not None
-                assert metadata['sdgym_version'] == sdgym.__version__
-        else:
-            subdirections = os.listdir(os.path.join(output_destination, file))
-            assert set(subdirections) == {'results.csv', f'fake_companies_{today_date}'}
-            synthesizer_directions = os.listdir(
-                os.path.join(output_destination, file, f'fake_companies_{today_date}')
+    assert directions == [f'SDGym_results_{today_date}']
+    subdirections = os.listdir(os.path.join(output_destination, directions[0]))
+    assert set(subdirections) == {
+        f'results_{today_date}_1.csv',
+        f'fake_companies_{today_date}',
+        f'run_{today_date}_1.yaml',
+    }
+    with open(
+        os.path.join(output_destination, directions[0], f'run_{today_date}_1.yaml'), 'r'
+    ) as f:
+        metadata = yaml.safe_load(f)
+        assert metadata['completed_date'] is not None
+        assert metadata['sdgym_version'] == sdgym.__version__
+
+    synthesizer_directions = os.listdir(
+        os.path.join(output_destination, directions[0], f'fake_companies_{today_date}')
+    )
+    assert set(synthesizer_directions) == {'TVAESynthesizer', 'GaussianCopulaSynthesizer'}
+    for synthesizer in sorted(synthesizer_directions):
+        synthesizer_files = os.listdir(
+            os.path.join(
+                output_destination, directions[0], f'fake_companies_{today_date}', synthesizer
             )
-            assert set(synthesizer_directions) == {'TVAESynthesizer', 'GaussianCopulaSynthesizer'}
-            for synthesizer in sorted(synthesizer_directions):
-                synthesizer_files = os.listdir(
-                    os.path.join(
-                        output_destination, file, f'fake_companies_{today_date}', synthesizer
-                    )
-                )
-                assert set(synthesizer_files) == {
-                    f'{synthesizer}_synthesizer.pkl',
-                    f'{synthesizer}_synthetic_data.csv',
-                    f'{synthesizer}_benchmark_result.csv',
-                }
-                score = pd.read_csv(
-                    os.path.join(
-                        output_destination,
-                        file,
-                        f'fake_companies_{today_date}',
-                        synthesizer,
-                        f'{synthesizer}_benchmark_result.csv',
-                    )
-                )
-                score_saved_separately = pd.concat(
-                    [score_saved_separately, score], ignore_index=True
-                )
-
-    saved_result = pd.read_csv(f'{output_destination}/SDGym_results_{today_date}/results.csv')
+        )
+        assert set(synthesizer_files) == {
+            f'{synthesizer}_synthesizer.pkl',
+            f'{synthesizer}_synthetic_data.csv',
+            f'{synthesizer}_benchmark_result.csv',
+        }
+        score = pd.read_csv(
+            os.path.join(
+                output_destination,
+                directions[0],
+                f'fake_companies_{today_date}',
+                synthesizer,
+                f'{synthesizer}_benchmark_result.csv',
+            )
+        )
+        score_saved_separately = pd.concat([score_saved_separately, score], ignore_index=True)
+
+    saved_result = pd.read_csv(
+        f'{output_destination}/SDGym_results_{today_date}/results_{today_date}_1.csv'
+    )
     pd.testing.assert_frame_equal(results, saved_result, check_dtype=False)
     pd.testing.assert_frame_equal(results, score_saved_separately, check_dtype=False)
 
@@ -673,47 +677,55 @@ def test_benchmark_single_table_with_output_destination_multiple_runs(tmp_path):
     )
 
     # Assert
-    final_results = pd.concat([result_1, result_2], ignore_index=True)
     score_saved_separately = pd.DataFrame()
     directions = os.listdir(output_destination)
-    assert f'SDGym_results_{today_date}' in directions
-    for file in directions:
-        if file.endswith('.yaml'):
-            with open(os.path.join(output_destination, file), 'r') as f:
-                metadata = yaml.safe_load(f)
-                assert metadata['completed_date'] is not None
-                assert metadata['sdgym_version'] == sdgym.__version__
-        else:
-            subdirections = os.listdir(os.path.join(output_destination, file))
-            assert set(subdirections) == {'results.csv', f'fake_companies_{today_date}'}
-            synthesizer_directions = os.listdir(
-                os.path.join(output_destination, file, f'fake_companies_{today_date}')
+    assert directions == [f'SDGym_results_{today_date}']
+    subdirections = os.listdir(os.path.join(output_destination, directions[0]))
+    assert set(subdirections) == {
+        f'results_{today_date}_1.csv',
+        f'results_{today_date}_2.csv',
+        f'fake_companies_{today_date}',
+        f'run_{today_date}_1.yaml',
+        f'run_{today_date}_2.yaml',
+    }
+    with open(
+        os.path.join(output_destination, directions[0], f'run_{today_date}_1.yaml'), 'r'
+    ) as f:
+        metadata = yaml.safe_load(f)
+        assert metadata['completed_date'] is not None
+        assert metadata['sdgym_version'] == sdgym.__version__
+
+    synthesizer_directions = os.listdir(
+        os.path.join(output_destination, directions[0], f'fake_companies_{today_date}')
+    )
+    assert set(synthesizer_directions) == {'TVAESynthesizer', 'GaussianCopulaSynthesizer'}
+    for synthesizer in sorted(synthesizer_directions):
+        synthesizer_files = os.listdir(
+            os.path.join(
+                output_destination, directions[0], f'fake_companies_{today_date}', synthesizer
             )
-            assert set(synthesizer_directions) == {'TVAESynthesizer', 'GaussianCopulaSynthesizer'}
-            for synthesizer in sorted(synthesizer_directions):
-                synthesizer_files = os.listdir(
-                    os.path.join(
-                        output_destination, file, f'fake_companies_{today_date}', synthesizer
-                    )
-                )
-                assert set(synthesizer_files) == {
-                    f'{synthesizer}_synthesizer.pkl',
-                    f'{synthesizer}_synthetic_data.csv',
-                    f'{synthesizer}_benchmark_result.csv',
-                }
-                score = pd.read_csv(
-                    os.path.join(
-                        output_destination,
-                        file,
-                        f'fake_companies_{today_date}',
-                        synthesizer,
-                        f'{synthesizer}_benchmark_result.csv',
-                    )
-                )
-                score_saved_separately = pd.concat(
-                    [score_saved_separately, score], ignore_index=True
-                )
-
-    saved_result = pd.read_csv(f'{output_destination}/SDGym_results_{today_date}/results.csv')
-    pd.testing.assert_frame_equal(final_results, saved_result, check_dtype=False)
-    pd.testing.assert_frame_equal(final_results, score_saved_separately, check_dtype=False)
+        )
+        assert set(synthesizer_files) == {
+            f'{synthesizer}_synthesizer.pkl',
+            f'{synthesizer}_synthetic_data.csv',
+            f'{synthesizer}_benchmark_result.csv',
+        }
+        score = pd.read_csv(
+            os.path.join(
+                output_destination,
+                directions[0],
+                f'fake_companies_{today_date}',
+                synthesizer,
+                f'{synthesizer}_benchmark_result.csv',
+            )
+        )
+        score_saved_separately = pd.concat([score_saved_separately, score], ignore_index=True)
+
+    saved_result_1 = pd.read_csv(
+        f'{output_destination}/SDGym_results_{today_date}/results_{today_date}_1.csv'
+    )
+    saved_result_2 = pd.read_csv(
+        f'{output_destination}/SDGym_results_{today_date}/results_{today_date}_2.csv'
+    )
+    pd.testing.assert_frame_equal(result_1, saved_result_1, check_dtype=False)
+    pd.testing.assert_frame_equal(result_2, saved_result_2, check_dtype=False)
diff --git a/tests/unit/test_benchmark.py b/tests/unit/test_benchmark.py
index f85358d7..086b6098 100644
--- a/tests/unit/test_benchmark.py
+++ b/tests/unit/test_benchmark.py
@@ -2,6 +2,7 @@
 import re
 from datetime import datetime
 from importlib.metadata import version
+from pathlib import Path
 from unittest.mock import ANY, MagicMock, patch
 
 import pandas as pd
@@ -385,7 +386,6 @@ def test__setup_output_destination(mock_validate, tmp_path):
     # Assert
     expected = {
         dataset: {
-            'meta': str(base_path / f'{dataset}_{today}' / 'meta.yaml'),
             **{
                 synth: {
                     'synthesizer': str(
@@ -397,6 +397,8 @@ def test__setup_output_destination(mock_validate, tmp_path):
                     'benchmark_result': str(
                         base_path / f'{dataset}_{today}' / synth / f'{synth}_benchmark_result.csv'
                     ),
+                    'run_id': str(base_path / f'run_{today}_1.yaml'),
+                    'results': str(base_path / f'results_{today}_1.csv'),
                 }
                 for synth in synthesizers
             },
@@ -408,32 +410,29 @@ def test__setup_output_destination(mock_validate, tmp_path):
     assert json.loads(json.dumps(result_2)) == expected
 
 
-@patch('sdgym.benchmark.uuid.uuid4')
 @patch('sdgym.benchmark.datetime')
-def test__write_run_id_file(mock_datetime, mock_uuid, tmp_path):
+def test__write_run_id_file(mock_datetime, tmp_path):
     """Test the `_write_run_id_file` method."""
     # Setup
     output_destination = tmp_path / 'output_destination'
     output_destination.mkdir()
-    mock_uuid.return_value = '123456789999'
     mock_datetime.today.return_value.strftime.return_value = '06_26_2025'
+    file_name = {'run_id': f'{output_destination}/run_06_26_2025_1.yaml'}
     jobs = [
-        ({'name': 'GaussianCopulaSynthesizer'}, 'adult', None, None),
+        ({'name': 'GaussianCopulaSynthesizer'}, 'adult', None, file_name),
         ({'name': 'CTGANSynthesizer'}, 'census', None, None),
     ]
     expected_jobs = [['adult', 'GaussianCopulaSynthesizer'], ['census', 'CTGANSynthesizer']]
     synthesizers = ['GaussianCopulaSynthesizer', 'CTGANSynthesizer', 'RealTabFormerSynthesizer']
 
     # Run
-    run_id = _write_run_id_file(output_destination, synthesizers, jobs)
+    _write_run_id_file(synthesizers, jobs)
 
     # Assert
-    assert run_id == '12345678'
-    run_id_file = output_destination / 'run_12345678.yaml'
-    assert run_id_file.exists()
-    with open(run_id_file, 'r') as file:
+    assert Path(file_name['run_id']).exists()
+    with open(file_name['run_id'], 'r') as file:
         run_id_data = yaml.safe_load(file)
-        assert run_id_data['run_id'] == '12345678'
+        assert run_id_data['run_id'] == 'run_06_26_2025_1'
         assert run_id_data['starting_date'] == '06_26_2025'
         assert run_id_data['jobs'] == expected_jobs
         assert run_id_data['sdgym_version'] == version('sdgym')
@@ -448,15 +447,15 @@ def test__update_run_id_file(mock_datetime, tmp_path):
     # Setup
     output_destination = tmp_path / 'output_destination'
     output_destination.mkdir()
-    run_id = '12345678'
+    metadata = {'run_id': 'run_06_25_2025_1', 'starting_date': '06_25_2025', 'completed_date': None}
+    run_id_file = output_destination / 'run_06_25_2025_1.yaml'
+    run_id = 'run_06_25_2025_1'
     mock_datetime.today.return_value.strftime.return_value = '06_26_2025'
-    metadata = {'run_id': run_id, 'starting_date': '06_25_2025', 'completed_date': None}
-    run_id_file = output_destination / f'run_{run_id}.yaml'
     with open(run_id_file, 'w') as file:
         yaml.dump(metadata, file)
 
     # Run
-    _update_run_id_file(output_destination, run_id)
+    _update_run_id_file(run_id_file)
 
     # Assert
     with open(run_id_file, 'r') as file: