astrofrog · prajwel · Jul 10, 2025 · Jul 10, 2025 · Jul 10, 2025 · Jul 10, 2025
diff --git a/README.rst b/README.rst
@@ -3,7 +3,7 @@
 About
 -----
 
-Sometimes you just want to compute simple 1D or 2D histograms with regular bins. Fast. No
+Sometimes you just want to compute simple 1D, 2D, or multidimensional histograms with regular bins. Fast. No
 nonsense. `Numpy's <http://www.numpy.org>`__ histogram functions are
 versatile, and can handle for example non-regular binning, but this
 versatility comes at the expense of performance.
@@ -13,8 +13,9 @@ histogram functions for regular bins that don't compromise on performance. It do
 anything complicated - it just implements a simple histogram algorithm
 in C and keeps it simple. The aim is to have functions that are fast but
 also robust and reliable. The result is a 1D histogram function here that
-is **7-15x faster** than ``numpy.histogram``, and a 2D histogram function
-that is **20-25x faster** than ``numpy.histogram2d``.
+is **2-15x faster** than ``numpy.histogram``, a 2D histogram function
+that is **10x faster** than ``numpy.histogram2d``, and a multidimensional
+histogram function that is **5-10x faster** than ``numpy.histogramdd``.
 
 To install::
 
@@ -24,12 +25,12 @@ or if you use conda you can instead do::
 
     conda install -c conda-forge fast-histogram
 
-The ``fast_histogram`` module then provides two functions:
-``histogram1d`` and ``histogram2d``:
+The ``fast_histogram`` module then provides three functions:
+``histogram1d``, ``histogram2d``, and ``histogramdd``:
 
 .. code:: python
 
-    from fast_histogram import histogram1d, histogram2d
+    from fast_histogram import histogram1d, histogram2d, histogramdd
 
 Example
 -------
@@ -46,24 +47,26 @@ histogram:
     In [3]: y = np.random.random(10_000_000)
 
     In [4]: %timeit _ = np.histogram2d(x, y, range=[[-1, 2], [-2, 4]], bins=30)
-    935 ms ± 58.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+    562 ms ± 5.83 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
 
     In [5]: from fast_histogram import histogram2d
 
     In [6]: %timeit _ = histogram2d(x, y, range=[[-1, 2], [-2, 4]], bins=30)
-    40.2 ms ± 624 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+    55.9 ms ± 583 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
 
 (note that ``10_000_000`` is possible in Python 3.6 syntax, use ``10000000`` instead in previous versions)
 
-The version here is over 20 times faster! The following plot shows the
+The version here is over 10 times faster! The following plot shows the
 speedup as a function of array size for the bin parameters shown above:
 
 .. figure:: https://github.com/astrofrog/fast-histogram/raw/main/speedup_compared.png
    :alt: Comparison of performance between Numpy and fast-histogram
 
-as well as results for the 1D case, also with 30 bins. The speedup for
-the 2D case is consistently between 20-25x, and for the 1D case goes
-from 15x for small arrays to around 7x for large arrays.
+as well as results for the 1D and 3D cases, also with 30 bins. The speedup for
+the 2D case is consistently between 10-12x, and for the 1D case goes
+from 15x for small arrays to around 2x for large arrays.
+We have benchmarked the ``histogramdd`` function with a 3D array, and the speedup
+is found to be between 5-10x.
 
 Q&A
 ---

diff --git a/comparison/benchmark.py b/comparison/benchmark.py
@@ -25,6 +25,18 @@
 NUMPY_2D_STMT = "np_histogram2d(x, y, range=[[-1, 2], [-2, 4]], bins=30)"
 FAST_2D_STMT = "histogram2d(x, y, range=[[-1, 2], [-2, 4]], bins=30)"
 
+SETUP_3D = """
+import numpy as np
+from numpy import histogramdd as np_histogramdd
+from fast_histogram import histogramdd
+x = np.random.random({size})
+y = np.random.random({size})
+z = np.random.random({size})
+"""
+
+NUMPY_3D_STMT = "np_histogramdd(np.column_stack([x, y, z]), range=[[-1, 2], [-2, 4], [-2, 4]], bins=30)"
+FAST_3D_STMT = "histogramdd(np.column_stack([x, y, z]), range=[[-1, 2], [-2, 4], [-2, 4]], bins=30)"
+
 # How long each benchmark should aim to take
 TARGET_TIME = 1.0
 
@@ -44,8 +56,8 @@ def time_stats(stmt=None, setup=None):
     return np.min(times) / number, np.mean(times) / number, np.median(times) / number
 
 
-FMT_HEADER = "# {:7s}" + " {:10s}" * 12 + "\n"
-FMT = "{:9d}" + " {:10.7e}" * 12 + "\n"
+FMT_HEADER = "# {:7s}" + " {:10s}" * 18 + "\n"
+FMT = "{:9d}" + " {:10.7e}" * 18 + "\n"
 
 with open("benchmark_times.txt", "w") as f:
     f.write(
@@ -63,6 +75,12 @@ def time_stats(stmt=None, setup=None):
             "fa_2d_min",
             "fa_2d_mean",
             "fa_2d_median",
+            "np_3d_min",
+            "np_3d_mean",
+            "np_3d_median",
+            "fa_3d_min",
+            "fa_3d_mean",
+            "fa_3d_median",
         )
     )
 
@@ -83,6 +101,12 @@ def time_stats(stmt=None, setup=None):
         fa_2d_min, fa_2d_mean, fa_2d_median = time_stats(
             stmt=FAST_2D_STMT, setup=SETUP_2D.format(size=size)
         )
+        np_3d_min, np_3d_mean, np_3d_median = time_stats(
+            stmt=NUMPY_3D_STMT, setup=SETUP_3D.format(size=size)
+        )
+        fa_3d_min, fa_3d_mean, fa_3d_median = time_stats(
+            stmt=FAST_3D_STMT, setup=SETUP_3D.format(size=size)
+        )
 
         f.write(
             FMT.format(
@@ -99,6 +123,12 @@ def time_stats(stmt=None, setup=None):
                 fa_2d_min,
                 fa_2d_mean,
                 fa_2d_median,
+                np_3d_min,
+                np_3d_mean,
+                np_3d_median,
+                fa_3d_min,
+                fa_3d_mean,
+                fa_3d_median,
             )
         )
         f.flush()
diff --git a/comparison/plot.py b/comparison/plot.py
@@ -17,17 +17,24 @@
     fa_2d_min,
     fa_2d_mean,
     fa_2d_median,
+    np_3d_min,
+    np_3d_mean,
+    np_3d_median,
+    fa_3d_min,
+    fa_3d_mean,
+    fa_3d_median,
 ) = np.loadtxt("benchmark_times.txt", unpack=True)
 
 fig = plt.figure()
 ax = fig.add_subplot(1, 1, 1)
-ax.plot(size, np_1d_min / fa_1d_min, color=(34 / 255, 122 / 255, 181 / 255), label="1D")
-ax.plot(size, np_2d_min / fa_2d_min, color=(255 / 255, 133 / 255, 25 / 255), label="2D")
+ax.plot(size, np_1d_min / fa_1d_min, label="1D")
+ax.plot(size, np_2d_min / fa_2d_min, label="2D")
+ax.plot(size, np_3d_min / fa_3d_min, label="DD (3D)")
 ax.set_xscale("log")
 ax.set_xlim(0.3, 3e8)
-ax.set_ylim(1, 35)
+ax.set_ylim(1, 20)
 ax.grid()
 ax.set_xlabel("Array size")
-ax.set_ylabel("Speedup (fast-histogram / numpy)")
+ax.set_ylabel(f"Speedup (fast-histogram / numpy (version {np.__version__})")
 ax.legend()
 fig.savefig("speedup_compared.png", bbox_inches="tight")
diff --git a/speedup_compared.png b/speedup_compared.png