From 5260380cec6355416ddb316c34ef0b64f5e96eec Mon Sep 17 00:00:00 2001 From: Chris Dibak Date: Fri, 9 Jan 2026 03:14:23 -0800 Subject: [PATCH] Add comment why we are not using std::geometric_distribution PiperOrigin-RevId: 854116436 --- cc/algorithms/BUILD | 1 + cc/algorithms/distributions.cc | 9 ++++++++- cc/algorithms/distributions.h | 6 ++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cc/algorithms/BUILD b/cc/algorithms/BUILD index 4a056534..450020b1 100644 --- a/cc/algorithms/BUILD +++ b/cc/algorithms/BUILD @@ -430,6 +430,7 @@ cc_library( deps = [ ":rand", ":util", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/memory", "@com_google_absl//absl/random", "@com_google_absl//absl/status", diff --git a/cc/algorithms/distributions.cc b/cc/algorithms/distributions.cc index ef42988b..8ec5c5a0 100644 --- a/cc/algorithms/distributions.cc +++ b/cc/algorithms/distributions.cc @@ -15,13 +15,20 @@ // #include "algorithms/distributions.h" +#include #include +#include #include +#include +#include +#include +#include "absl/log/check.h" #include "absl/memory/memory.h" #include "absl/random/random.h" #include "absl/status/status.h" #include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "algorithms/rand.h" #include "algorithms/util.h" @@ -145,7 +152,7 @@ double GaussianDistribution::SampleGeometric() { // of n must be at least 10^6. This is to ensure an accurate approximation of a // Gaussian distribution. double GaussianDistribution::SampleBinomial(double sqrt_n) { - long long step_size = + int64_t step_size = static_cast(std::round(std::sqrt(2.0) * sqrt_n + 1)); SecureURBG& random = SecureURBG::GetInstance(); diff --git a/cc/algorithms/distributions.h b/cc/algorithms/distributions.h index d6b1fd5b..2180c2f4 100644 --- a/cc/algorithms/distributions.h +++ b/cc/algorithms/distributions.h @@ -87,6 +87,12 @@ class GaussianDistribution { // be positive. If the result would be higher than the maximum int64_t, returns // the maximum int64_t, which means that users should be careful around the // edges of their distribution. +// +// Note that we are not using std::geometric_distribution, as samples returned +// by that class are often not accurate enough for anonymization purposes. For +// instance, when sampling with very low p, std::geometric_distribution does not +// return odd values. In some cases, this could leak information about the +// parity of the input. class GeometricDistribution { public: // Builder for GeometricDistribution.