alright MLbench finalized for real

inikishev · inikishev · commit 102003017543 · 2025-10-12T10:17:10.000+03:00
diff --git a/visualbench/benchmark.py b/visualbench/benchmark.py
@@ -520,6 +520,7 @@ def _train_epoch(self, optimizer):
 
     def _test_epoch(self):
         assert self._dltest is not None
+        test_start = time.time()
         self.eval()
         batch_backup = self.batch
 
@@ -528,6 +529,8 @@ def _test_epoch(self):
             self._one_step(optimizer=None)
 
         self._last_test_time = time.time()
+        self.log("test time", self._last_test_time - test_start, plot=False)
+
         self._last_test_pass = self.num_passes
         self.batch = batch_backup
         self.train()
diff --git a/visualbench/logger.py b/visualbench/logger.py
@@ -27,6 +27,7 @@ def min(self, metric): return np.min(self.list(metric))
     def nanmin(self, metric): return np.nanmin(self.list(metric))
     def max(self, metric): return np.max(self.list(metric))
     def nanmax(self, metric): return np.nanmax(self.list(metric))
+    def sum(self, metric): return np.sum(self.list(metric))
 
     def interp(self, metric: str) -> np.ndarray:
         """Returns a list of values for a given key, interpolating missing steps."""
diff --git a/visualbench/models/ode.py b/visualbench/models/ode.py
@@ -16,7 +16,7 @@ def forward(self, t, z: torch.Tensor):
 
 # test 'dopri5', 'adams'
 class NeuralODE(nn.Module):
-    def __init__(self, in_channels: int, out_channels: int, width: int, act_cls = F.softplus, layer_norm=False, T = 10., steps = 2, adjoint = False, method = 'implicit_adams'):
+    def __init__(self, in_channels: int, out_channels: int, width: int, act_cls = torch.nn.Softplus, layer_norm=False, T = 10., steps = 2, adjoint = False, method = 'implicit_adams'):
         super().__init__()
         self.in_layer = nn.Linear(in_channels, width)
         self.ode_func = _ODELinear(width, act_cls = act_cls, layer_norm=layer_norm)
diff --git a/visualbench/runs/benchmark_benchmark.py b/visualbench/runs/benchmark_benchmark.py
@@ -153,7 +153,7 @@ def quickrun(self):
         opt = lambda p, lr: torch.optim.RMSprop(p, lr)
         self.run_optimizer(opt, "RMSprop", tune=True, max_dim=None)
 
-        opt = lambda p, lr: tz.Optimizer(p, tz.m.SOAP(), tz.m.LR(lr))
+        opt = lambda p, lr: tz.Optimizer(p, tz.m.SOAP(max_dim=2048), tz.m.LR(lr))
         self.run_optimizer(opt, "SOAP", tune=True, max_dim=None)
 
 
@@ -198,7 +198,7 @@ def run_stochastic(self):
         opt = lambda p, lr: tz.Optimizer(p, tz.m.GGT(), tz.m.LR(lr))
         self.run_optimizer(opt, "GGT", tune=True, max_dim=None)
 
-        opt = lambda p, lr: tz.Optimizer(p, tz.m.SOAP(), tz.m.LR(lr))
+        opt = lambda p, lr: tz.Optimizer(p, tz.m.SOAP(max_dim=2048), tz.m.LR(lr))
         self.run_optimizer(opt, "SOAP", tune=True, max_dim=None)
 
         # PSGD Kron
diff --git a/visualbench/runs/benchpack.py b/visualbench/runs/benchpack.py
@@ -82,54 +82,110 @@ def run_bench(bench: "Benchmark", task_name: str, passes: int, sec: float, metri
             if max_dim is not None and dim > max_dim: return
 
             start = time.time()
+            test_time = 0
             clean_mem()
 
              # skip CPU because accelerator state can't change.
             if (accelerate) and (Accelerator is not None) and (next(bench.parameters()).is_cuda):
                 accelerator = Accelerator()
                 bench = accelerator.prepare(bench)
 
+            # -------------------------------- logger func ------------------------------- #
             def logger_fn(value: float):
                 if dim > 100_000: clean_mem()
 
+                # set seed
                 torch.manual_seed(0)
                 np.random.seed(0)
                 random.seed(0)
 
+                # run
                 bench.reset().set_performance_mode().set_print_inverval(None)
                 opt = init_fn(opt_fn, bench, value)
                 bench.run(opt, max_passes=passes, max_seconds=sec, test_every_forwards=test_every, num_extra_passes=num_extra_passes, step_callbacks=step_callbacks)
+
+                # print progress
                 if print_progress and bench.seconds_passed is not None and bench.seconds_passed > sec:
                     print(f"{sweep_name}: '{task_name}' timeout, {bench.seconds_passed} > {sec}!")
+
+                # add test time
+                if "test time" in bench.logger:
+                    nonlocal test_time
+                    test_time += bench.logger.sum("test time")
+
                 return bench.logger
 
+            # --------------------------------- single run ------------------------------- #
             if (hyperparam is None) or (not tune):
-                sweep = single_run(logger_fn, metrics=metrics, fixed_hyperparams=fixed_hyperparams, root=root, task_name=task_name, run_name=sweep_name, print_records=print_records, print_progress=print_progress, save=save, load_existing=load_existing)
-
+                sweep = single_run(
+                    logger_fn,
+                    metrics=metrics,
+                    fixed_hyperparams=fixed_hyperparams,
+                    root=root,
+                    task_name=task_name,
+                    run_name=sweep_name,
+                    print_records=print_records,
+                    print_progress=print_progress,
+                    save=save,
+                    load_existing=load_existing,
+                )
+
+            # -------------------------------- mbs search -------------------------------- #
             else:
-                sweep = mbs_search(logger_fn, metrics=metrics, search_hyperparam=hyperparam, fixed_hyperparams=fixed_hyperparams, log_scale=log_scale, grid=grid, step=step, num_candidates=num_candidates, num_binary=max(1, int(num_binary*binary_mul)), num_expansions=num_expansions, rounding=rounding, root=root, task_name=task_name, run_name=sweep_name, print_records=print_records, save=save, load_existing=load_existing, print_progress=print_progress)
-
-            # render video
+                sweep = mbs_search(
+                    logger_fn,
+                    metrics=metrics,
+                    search_hyperparam=hyperparam,
+                    fixed_hyperparams=fixed_hyperparams,
+                    log_scale=log_scale,
+                    grid=grid,
+                    step=step,
+                    num_candidates=num_candidates,
+                    num_binary=max(1, int(num_binary * binary_mul)),
+                    num_expansions=num_expansions,
+                    rounding=rounding,
+                    root=root,
+                    task_name=task_name,
+                    run_name=sweep_name,
+                    print_records=print_records,
+                    save=save,
+                    load_existing=load_existing,
+                    print_progress=print_progress,
+                )
+
+            # ------------------------------- render video ------------------------------- #
             if (render_vids) and (vid_scale is not None) and (self.summaries_root is not None):
                 assert self.summary_dir is not None
                 for metric, maximize in _target_metrics_to_dict(metrics).items():
+
+                    # check if video already exists and skip if it does
                     video_path = os.path.join(self.summary_dir, f'{task_name} - {metric}')
                     if os.path.exists(f'{video_path}.mp4'): continue
 
+                    # find hyperparameter value of the best run
                     best_run = sweep.best_runs(metric, maximize, 1)[0]
                     value = 0
                     if tune and hyperparam is not None: value = best_run.hyperparams[hyperparam]
+
+                    # run benchmark with visualization enabled
                     bench.reset().set_performance_mode(False).set_print_inverval(None)
                     opt = init_fn(opt_fn, bench, value)
                     bench.run(opt, max_passes=passes, max_seconds=sec, test_every_forwards=test_every, num_extra_passes=num_extra_passes)
+
+                    # make dirs and render to __TEMP__.mp4 to avoid saving partial renders
                     if not os.path.exists(self.summaries_root): os.mkdir(self.summaries_root)
                     if not os.path.exists(self.summary_dir): os.mkdir(self.summary_dir)
                     bench.render(f'{video_path} __TEMP__', scale=vid_scale, fps=fps, progress=False)
+
+                    # after successful render renamed __TEMP__.mp4 to actual path
                     os.rename(f'{video_path} __TEMP__.mp4', f'{video_path}.mp4')
 
+            # -------------------------------- print time -------------------------------- #
             if print_time:
-                if print_progress: print("                                                                                                  ", end="\r")
-                print(f"{task_name} took {(time.time() - start):.2f} s.")
+                if print_progress: print(" " * 1000, end="\r")
+                s = f"{task_name} took {(time.time() - start):.2f} s."
+                if test_time != 0: s = f"{s}; test epochs took {float(test_time):.2f} s."
+                print(s)
 
         self.run_bench = run_bench
 
diff --git a/visualbench/runs/mlbench.py b/visualbench/runs/mlbench.py
@@ -3,6 +3,7 @@
 from collections.abc import Callable, Iterable, Mapping, Sequence
 from typing import TYPE_CHECKING, Any
 
+import rtdl_revisiting_models
 import torch
 from monai.losses.dice import DiceFocalLoss
 from torch import nn
@@ -70,7 +71,7 @@ def run_ml(self):
         # # ndim = 132,611
         # # 22s. ~ 7m. 20s.
         # # 9+3=12 ~ 4m. 20s.
-        # bench = tasks.WavePINN(tasks.WavePINN.FLS(2, 1, hidden_size=256, n_hidden=3)).to(CUDA_IF_AVAILABLE)
+        # bench = tasks.WavePINN(tasks.WavePINN.FLS(2, 1, hidden_size=256, n_hidden=3), n_pde=512, n_ic=256, n_bc=256).to(CUDA_IF_AVAILABLE)
         # self.run_bench(bench, 'ML - Wave PDE - FLS', passes=10_000, sec=600, metrics='train loss', vid_scale=4)
 
     def run_mls(self):
@@ -81,7 +82,7 @@ def run_mls(self):
         # 5s. ~ 1m. 40s.
         bench = tasks.Collinear(models.MLP([32, 10]), batch_size=1).to(CUDA_IF_AVAILABLE)
         bench_name = 'MLS - Ill-conditioned logistic regression BS-1'
-        self.run_bench(bench, bench_name, passes=10_000, sec=600, test_every=50, metrics='test loss', vid_scale=None)
+        self.run_bench(bench, bench_name, passes=20_000, sec=1_000, test_every=50, metrics='test loss', vid_scale=None)
 
         # --------------------------- Matrix factorization --------------------------- #
         # ...
@@ -90,32 +91,46 @@ def run_mls(self):
             path = "MovieLens-100k/ml-100k"
             if not os.path.exists(path):
                 path = load_movie_lens()
-        bench = tasks.MFMovieLens(path, batch_size=32, device='cuda').cuda()
+        bench = tasks.MFMovieLens(path, batch_size=32, device='cuda').to(CUDA_IF_AVAILABLE)
         bench_name = 'MLS - MovieLens BS-32 - Matrix Factorization'
-        self.run_bench(bench, bench_name, passes=10_000, sec=600, test_every=50, metrics='test loss', vid_scale=None)
+        self.run_bench(bench, bench_name, passes=20_000, sec=1_000, test_every=50, metrics='test loss', vid_scale=None)
 
         # ------------------------------ MLP (Colinear) ------------------------------ #
         model = models.MLP([32, 64, 96, 128, 256, 10])
-        bench = tasks.Collinear(model, batch_size=64, test_batch_size=4096).cuda()
+        bench = tasks.Collinear(model, batch_size=64, test_batch_size=4096).to(CUDA_IF_AVAILABLE)
         bench_name = 'MLS - Colinear BS-64 - MLP(32-64-96-128-256-10)'
-        self.run_bench(bench, bench_name, passes=10_000, sec=600, test_every=100, metrics='test loss', vid_scale=None)
+        self.run_bench(bench, bench_name, passes=20_000, sec=1_000, test_every=100, metrics='test loss', vid_scale=None)
 
         # ------------------------------- RNN (MNIST-1D) ------------------------------ #
         # ndim = 20,410
         # 11s. ~ 3m. 30s.
-        bench = tasks.datasets.Mnist1d(
+        bench = tasks.Mnist1d(
             models.RNN(1, 10, hidden_size=40, num_layers=2, rnn=torch.nn.RNN),
             batch_size=128,
         ).to(CUDA_IF_AVAILABLE)
-        bench_name = 'MLS - MNIST-1D BS-128 - RNN(2x40)'
-        self.run_bench(bench, bench_name, passes=10_000, sec=600, test_every=20, metrics='test loss', vid_scale=None, binary_mul=0.5)
+        bench_name = 'MLS - Mnist1d-5_000 BS-128 - RNN(2x40)'
+        self.run_bench(bench, bench_name, passes=20_000, sec=1_000, test_every=20, metrics='test loss', vid_scale=None)
+
+        # ------------------------- FTTransformer (MNIST-1D) ------------------------- #
+        class NoCat(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.model = rtdl_revisiting_models.FTTransformer(n_cont_features=40, cat_cardinalities=[], d_out=10,
+                    **rtdl_revisiting_models.FTTransformer.get_default_kwargs(1))
+
+            def forward(self, x):
+                return self.model.forward(x, None)
+
+        bench = tasks.Mnist1d(NoCat(), batch_size=32, test_batch_size=1024, num_samples=20_000).to(CUDA_IF_AVAILABLE)
+        bench_name = 'MLS - Mnist1d-20_000 BS-32 - FTTransformer'
+        self.run_bench(bench, bench_name, passes=20_000, sec=1_000, test_every=200, metrics='test loss', vid_scale=None)
 
         # ---------------------------- ConvNet (MNIST-1D) ---------------------------- #
         # ndim = 134,410
-        bench = tasks.datasets.Mnist1d(
+        bench = tasks.Mnist1d(
             models.vision.ConvNet(40, 1, 10, widths=(64, 128, 256), dropout=0.7),
             batch_size=32, test_batch_size=256
         ).to(CUDA_IF_AVAILABLE)
-        bench_name = "MLS - MNIST-1D BS-32 - ConvNet"
-        self.run_bench(bench, bench_name, passes=20_000, sec=1000, test_every=50, metrics = "test loss", vid_scale=None)
+        bench_name = "MLS - Mnist1d-5_000 BS-32 - ConvNet"
+        self.run_bench(bench, bench_name, passes=20_000, sec=1_000, test_every=50, metrics = "test loss", vid_scale=None)
 
diff --git a/visualbench/tasks/__init__.py b/visualbench/tasks/__init__.py
@@ -24,6 +24,7 @@
     test_functions,
     TEST_FUNCTIONS,
 )
+from .tammes import Tammes
 from .glimmer import Glimmer
 from .gmm import GaussianMixtureNLL
 from .graph_layout import GraphLayout
diff --git a/visualbench/tasks/tammes.py b/visualbench/tasks/tammes.py
@@ -0,0 +1,106 @@
+import math
+
+import cv2
+import numpy as np
+import torch
+from torch import nn
+from ..benchmark import Benchmark
+
+class Tammes(Benchmark):
+    """Tammes problem is to maximize minimal distance between points on a sphere
+
+    Points are parameterized by spherical coordinates (theta, phi).
+
+    Renders:
+        points.
+
+    Args:
+        num_points (int): The number of points (N) on the sphere.
+        initial_dist_epsilon (float): Small value to perturb initial positions
+                                        to avoid stacking points and poles.
+    """
+    def __init__(self, num_points: int, initial_dist_epsilon: float = 1e-3, resolution=256, p=2, draw_lines=None):
+        super().__init__()
+        if num_points < 2:
+            raise ValueError("Number of points must be at least 2.")
+        self.num_points = num_points
+
+        self.p=p
+        initial_thetas = torch.rand(num_points) * (math.pi - 2 * initial_dist_epsilon) + initial_dist_epsilon
+        initial_phis = torch.rand(num_points) * (2 * math.pi)
+
+        self.thetas = nn.Parameter(initial_thetas)
+        self.phis = nn.Parameter(initial_phis)
+
+        self.eps = 1e-12
+
+        if draw_lines is None: draw_lines = num_points < 12
+        self.draw_lines = draw_lines
+        self.resolution = resolution
+
+    def spherical_to_cartesian(self, thetas: torch.Tensor, phis: torch.Tensor) -> torch.Tensor:
+        """Converts spherical coordinates (unit radius) to Cartesian coordinates."""
+        x = torch.sin(thetas) * torch.cos(phis)
+        y = torch.sin(thetas) * torch.sin(phis)
+        z = torch.cos(thetas)
+        # (num_points, 3)
+        coords = torch.stack([x, y, z], dim=1)
+        return coords
+
+    @torch.no_grad
+    def _make_frame(
+        self,
+        coords: torch.Tensor,
+        img_size: int = 512,
+        point_radius: int = 5,
+        draw_lines: bool = False,
+        line_thickness: int = 1,
+        line_color: tuple[int, int, int] = (70, 70, 70) # Faint grey BGR
+        ) -> np.ndarray:
+        frame = np.zeros((img_size, img_size, 3), dtype=np.uint8)
+        cv2.circle(frame, (img_size // 2, img_size // 2), img_size // 2 - 1, (50, 50, 50), 1, cv2.LINE_AA) # pylint:disable=no-member
+
+        coords_np = coords.detach().cpu().numpy()
+
+        # Project onto xy plane and scale to image coordinates
+        # x, y are in [-1, 1], map to [0, img_size]
+        img_coords = []
+        for i in range(self.num_points):
+            x, y, z = coords_np[i]
+            # Scale x, y from [-1, 1] to [0, img_size]
+            img_x = int((x + 1.0) / 2.0 * img_size)
+            img_y = int((y + 1.0) / 2.0 * img_size)
+            img_coords.append(((img_x, img_y), z))
+
+        # Draw lines firstso points are drawn on top
+        if draw_lines:
+            for i in range(self.num_points):
+                pt1, _ = img_coords[i]
+                for j in range(i + 1, self.num_points):
+                    pt2, _ = img_coords[j]
+                    cv2.line(frame, pt1, pt2, line_color, line_thickness, cv2.LINE_AA) # pylint:disable=no-member
+
+        # Points (circles)
+        for i in range(self.num_points):
+            (img_x, img_y), z = img_coords[i]
+
+            # Color/brightness to indicate depth (z coordinate)
+            intensity = int((z + 1.0) / 2.0 * 200) + 55 # Map z=[-1,1] to brightness [55, 255]
+            color = (intensity // 2, intensity // 2, intensity) # BGR, bias towards blue/white
+
+            cv2.circle(frame, (img_x, img_y), point_radius, color, -1, cv2.LINE_AA) # filled circle # pylint:disable=no-member
+
+        return frame
+
+    def get_loss(self) -> torch.Tensor:
+        cartesian = self.spherical_to_cartesian(self.thetas, self.phis)
+        pdists = torch.cdist(cartesian, cartesian, p=self.p)
+        pdists = pdists + torch.eye(pdists.size(0), device=pdists.device, dtype=pdists.dtype) * pdists.amax().detach() * 2
+
+        loss = 1 / pdists.amin()
+
+        if self._make_images:
+            frame = self._make_frame(cartesian, img_size=self.resolution, draw_lines=self.draw_lines)
+            self.log_image('solution', frame, to_uint8=False, show_best=True)
+
+        return loss

Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,7 @@`
`24`	`24`	`test_functions,`
`25`	`25`	`TEST_FUNCTIONS,`
`26`	`26`	`)`
	`27`	`+from .tammes import Tammes`
`27`	`28`	`from .glimmer import Glimmer`
`28`	`29`	`from .gmm import GaussianMixtureNLL`
`29`	`30`	`from .graph_layout import GraphLayout`