Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified common_docs/Privacy_Loss_Distributions.pdf
Binary file not shown.
3 changes: 2 additions & 1 deletion python/dp_accounting/dp_accounting/pld/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ py_library(
":privacy_loss_distribution",
"//dp_accounting:dp_event",
"//dp_accounting:privacy_accountant",
"@accounting_py_pip_deps//numpy",
],
)

Expand All @@ -59,6 +60,7 @@ py_test(
"//dp_accounting:privacy_accountant",
"//dp_accounting:privacy_accountant_test",
"@accounting_py_pip_deps//absl_py",
"@accounting_py_pip_deps//numpy",
],
)

Expand Down Expand Up @@ -132,7 +134,6 @@ py_library(

py_test(
name = "privacy_loss_distribution_test",
size = "small",
srcs = ["privacy_loss_distribution_test.py"],
python_version = "PY3",
deps = [
Expand Down
86 changes: 85 additions & 1 deletion python/dp_accounting/dp_accounting/pld/pld_privacy_accountant.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,17 @@
"""Privacy accountant that uses Privacy Loss Distributions."""

import math
from typing import Optional
import numbers
from typing import Optional, Union

import numpy as np

from dp_accounting import dp_event
from dp_accounting import privacy_accountant
from dp_accounting.pld import common
from dp_accounting.pld import privacy_loss_distribution


NeighborRel = privacy_accountant.NeighboringRelation
CompositionErrorDetails = (
privacy_accountant.PrivacyAccountant.CompositionErrorDetails
Expand Down Expand Up @@ -277,3 +281,83 @@ def get_delta(self, target_epsilon: float) -> float:
if self._contains_non_dp_event:
return 1
return self._pld.get_delta_for_epsilon(target_epsilon) # pytype: disable=bad-return-type

def get_true_positive_rates(
self,
false_positive_rates: Union[float, np.ndarray],
deltas: Optional[np.ndarray] = None,
) -> Union[float, np.ndarray]:
"""Computes an upper bound on the true positive rate (TPR).

In particular, each (epsilon, delta) pair implied by the PLD also implies an
upper bound on the TPR for a given false positive rate (FPR). This function
computes this upper bound for a range of deltas (either user-specified, or a
default range) and then returns the minimum TPR across all deltas. See
Section 3.1 of the supplementary material for details.

Note that this implementation reports a TPR-FPR curve which is symmetric
with respect to the line y=1-x, which is not true for the true TPR-FPR curve
for asymmetric mechanisms (e.g., subsampled Gaussian under add-remove). In
this case the curve is still a valid upper bound on the true TPR-FPR curve,
but perhaps overly pessimistic.

Args:
false_positive_rates: the FPR or list of FPRs at which to compute the TPR.
deltas: the list of deltas to use for the computation. If None, the
default deltas `np.logspace(np.log10(1e-13), np.log10(1), num=3000)` and
0 will be used. A denser and wider range of deltas will yield a more
accurate estimate, at the cost of increased run-time.

Returns:
A float or array of floats representing the upper bound on the TPR at the
given FPR or list of FPRs.
"""
if self._contains_non_dp_event:
if isinstance(false_positive_rates, numbers.Number):
return 1.0
else:
return np.ones_like(false_positive_rates)
return self._pld.get_true_positive_rates(false_positive_rates, deltas)

def get_gdp_parameter_estimate(
self,
false_positive_rates: Optional[np.ndarray] = None,
deltas: Optional[np.ndarray] = None,
) -> float:
"""Computes an estimate of the mu-GDP parameter implied by the PLD.

Specifically, we upper bound the true positive rate (TPR) at a given range
of false positive rates (FPRs), and then find the minimum mu-GDP value that
upper bounds all of the TPR upper bounds. This is pessimistic in that we are
using upper bounds on the TPRs, but optimistic in that we are using a finite
grid of TPRs, which is not guaranteed to contain the point at which the true
mu-GDP parameter is tight. See Section 3.2 of the supplementary material for
details.

If the privacy loss is infinite with probability greater than
min(false_positive_rates), then this function will return infinity.
This is so that (i) when a PLD has large infinity mass, we correctly report
infinite mu-GDP, but simultaneously (ii) the small infinity masses
introduced by truncating a PLD to finite support do not result in infinite
mu-GDP. We recommend reporting the minimum FPR used when reporting mu-GDP
values computed using this function.

Args:
false_positive_rates: The list of FPRs to use for the computation. If
None, the default FPRs `np.logspace(np.log10(1e-12), np.log10(0.5),
num=500)` will be used. A denser and wider range of FPRs will reduce
optimism of the estimate, at the cost of increased run-time.
deltas: The list of deltas to use for the computation. If None, the
default deltas `np.logspace(np.log10(1e-13), np.log10(1), num=3000)` and
0 will be used. A denser and wider range of deltas will reduce pessimism
of the estimate, at the cost of increased run-time.

Returns:
The estimated mu-GDP parameter. Note that this is not guaranteed to be an
upper or lower bound on the true mu-GDP parameter (but can be made
arbitrarily close to the true mu-GDP parameter by increasing the precision
of the PLD, deltas, and FPRs).
"""
if self._contains_non_dp_event:
return math.inf
return self._pld.get_gdp_parameter_estimate(false_positive_rates, deltas)
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from absl.testing import absltest
from absl.testing import parameterized
import numpy as np

from dp_accounting import dp_event
from dp_accounting import privacy_accountant
Expand Down Expand Up @@ -173,6 +174,8 @@ def test_additive_noise_mechanisms_with_zero_noise_multiplier(self, event):
accountant.compose(event)
self.assertEqual(accountant.get_delta(1.0), 1)
self.assertEqual(accountant.get_epsilon(0.01), math.inf)
self.assertEqual(accountant.get_true_positive_rates(0.1), 1)
self.assertEqual(accountant.get_gdp_parameter_estimate(), math.inf)

@parameterized.parameters(
dp_event.PoissonSampledDpEvent(0, dp_event.GaussianDpEvent(1)),
Expand All @@ -187,6 +190,30 @@ def test_poisson_subsampling_with_zero_probability(self, event):
accountant.compose(event)
self.assertEqual(accountant.get_delta(0), 0)
self.assertEqual(accountant.get_epsilon(0), 0)
self.assertEqual(accountant.get_true_positive_rates(0.5), 0.5)
self.assertAlmostEqual(accountant.get_gdp_parameter_estimate(), 0)

def test_epsilon_basic(self):
event1 = dp_event.EpsilonDeltaDpEvent(epsilon=1.0, delta=0.0)
event2 = dp_event.EpsilonDeltaDpEvent(epsilon=2.0, delta=0.0)
accountant = pld_privacy_accountant.PLDAccountant()
accountant.compose(event1)
accountant.compose(event2)
self.assertEqual(accountant.get_delta(3.0), 0.0)
self.assertEqual(accountant.get_epsilon(0.0), 3.0)
self.assertAlmostEqual(
accountant.get_true_positive_rates(0.1), 0.8286, delta=1e-3
)
false_positive_rates = np.logspace(
np.log10(0.005), np.log10(0.5), num=20_000
)
self.assertAlmostEqual(
accountant.get_gdp_parameter_estimate(
false_positive_rates=false_positive_rates
),
2.358,
delta=1e-3,
)

def test_epsilon_delta_basic(self):
event1 = dp_event.EpsilonDeltaDpEvent(epsilon=1.0, delta=0.1)
Expand All @@ -196,6 +223,10 @@ def test_epsilon_delta_basic(self):
accountant.compose(event2)
self.assertEqual(accountant.get_delta(3.0), 0.28)
self.assertEqual(accountant.get_epsilon(0.28), 3.0)
self.assertAlmostEqual(
accountant.get_true_positive_rates(0.1), 0.91938, delta=1e-3
)
self.assertEqual(accountant.get_gdp_parameter_estimate(), math.inf)

def test_exponential_mechanism_basic(self):
event1 = dp_event.ExponentialMechanismDpEvent(1.0)
Expand All @@ -214,10 +245,20 @@ def test_gaussian_basic(self):

exact_epsilon = 1
exact_delta = 0.126936
exact_tpr_at_0_1_fpr = 0.38914
exact_mu = 1
self.assertAlmostEqual(
accountant.get_delta(exact_epsilon), exact_delta, delta=1e-3)
self.assertAlmostEqual(
accountant.get_epsilon(exact_delta), exact_epsilon, delta=1e-3)
self.assertAlmostEqual(
accountant.get_true_positive_rates(0.1),
exact_tpr_at_0_1_fpr,
delta=1e-3,
)
self.assertAlmostEqual(
accountant.get_gdp_parameter_estimate(), exact_mu, delta=1e-3
)

def test_poisson_subsampled_gaussian(self):
subsampled_gaussian_event = dp_event.PoissonSampledDpEvent(
Expand Down Expand Up @@ -343,6 +384,15 @@ def test_truncated_subsampled_gaussian_basic(self):
accountant.get_epsilon(expected_delta), expected_epsilon, delta=1e-6
)

def test_contains_non_dp_event(self):
accountant = pld_privacy_accountant.PLDAccountant()
accountant.compose(dp_event.NonPrivateDpEvent())
self.assertTrue(accountant._contains_non_dp_event)
self.assertEqual(accountant.get_delta(1.0), 1)
self.assertEqual(accountant.get_epsilon(0.01), math.inf)
self.assertEqual(accountant.get_true_positive_rates(0.1), 1)
self.assertEqual(accountant.get_gdp_parameter_estimate(), math.inf)


if __name__ == '__main__':
absltest.main()
121 changes: 119 additions & 2 deletions python/dp_accounting/dp_accounting/pld/privacy_loss_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import collections
import logging
import math
import numbers
from typing import Any, Callable, Mapping, Optional, Sequence, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -325,6 +326,122 @@ def compute_mixture(
)
# pylint:enable=protected-access

def get_true_positive_rates(
self,
false_positive_rates: Union[float, np.ndarray],
deltas: Optional[np.ndarray] = None,
) -> Union[float, np.ndarray]:
"""Computes an upper bound on the true positive rate (TPR).

In particular, each (epsilon, delta) pair implied by the PLD also implies an
upper bound on the TPR for a given false positive rate (FPR). This function
computes this upper bound for a range of deltas (either user-specified, or a
default range) and then returns the minimum TPR across all deltas. See
Section 3.1 of the supplementary material for details.

Note that this implementation reports a TPR-FPR curve which is symmetric
with respect to the line y=1-x, which is not true for the true TPR-FPR curve
for asymmetric mechanisms (e.g., subsampled Gaussian under add-remove). In
this case the curve is still a valid upper bound on the true TPR-FPR curve,
but perhaps overly pessimistic.

Args:
false_positive_rates: the FPR or list of FPRs at which to compute the TPR.
deltas: the list of deltas to use for the computation. If None, the
default deltas `np.logspace(np.log10(1e-13), np.log10(1), num=3000)` and
0 will be used. A denser and wider range of deltas will yield a more
accurate estimate, at the cost of increased run-time.

Returns:
A float or array of floats representing the upper bound on the TPR at the
given FPR or list of FPRs.
"""
false_positive_rates_array = (
np.array([false_positive_rates])
if isinstance(false_positive_rates, numbers.Number)
else false_positive_rates
)
true_positive_rates = np.ones_like(false_positive_rates_array)
if deltas is None:
deltas = np.concatenate(
([0.0], np.logspace(np.log10(1e-13), 0.0, num=3000))
)
for delta in deltas:
epsilon = self.get_epsilon_for_delta(delta)
bound1 = np.exp(epsilon) * false_positive_rates_array + delta
bound2 = 1.0 + (false_positive_rates_array + delta - 1.0) * np.exp(
-epsilon
)
true_positive_rates = np.minimum(true_positive_rates, bound1)
true_positive_rates = np.minimum(true_positive_rates, bound2)
if isinstance(false_positive_rates, numbers.Number):
return true_positive_rates[0]
else:
return true_positive_rates

def get_gdp_parameter_estimate(
self,
false_positive_rates: Optional[np.ndarray] = None,
deltas: Optional[np.ndarray] = None,
) -> float:
"""Computes an estimate of the mu-GDP parameter implied by the PLD.

Specifically, we upper bound the true positive rate (TPR) at a given range
of false positive rates (FPRs), and then find the minimum mu-GDP value that
upper bounds all of the TPR upper bounds. This is pessimistic in that we are
using upper bounds on the TPRs, but optimistic in that we are using a finite
grid of TPRs, which is not guaranteed to contain the point at which the true
mu-GDP parameter is tight. See Section 3.2 of the supplementary material for
details.

If the privacy loss is infinite with probability greater than
min(false_positive_rates), then this function will return infinity.
This is so that (i) when a PLD has large infinity mass, we correctly report
infinite mu-GDP, but simultaneously (ii) the small infinity masses
introduced by truncating a PLD to finite support do not result in infinite
mu-GDP. We recommend reporting the minimum FPR used when reporting mu-GDP
values computed using this function.

Args:
false_positive_rates: The list of FPRs to use for the computation. If
None, the default FPRs `np.logspace(np.log10(1e-12), np.log10(0.5),
num=500)` will be used. A denser and wider range of FPRs will reduce
optimism of the estimate, at the cost of increased run-time.
deltas: The list of deltas to use for the computation. If None, the
default deltas `np.logspace(np.log10(1e-13), np.log10(1), num=3000)` and
0 will be used. A denser and wider range of deltas will reduce pessimism
of the estimate, at the cost of increased run-time.

Returns:
The estimated mu-GDP parameter. Note that this is not guaranteed to be an
upper or lower bound on the true mu-GDP parameter (but can be made
arbitrarily close to the true mu-GDP parameter by increasing the precision
of the PLD, deltas, and FPRs).
"""
if false_positive_rates is None:
false_positive_rates = np.logspace(
np.log10(1e-12), np.log10(0.5), num=500
)
else:
# Since the current implementation of `get_true_positive_rates` is
# symmetric with respect to the line y=1-x, we only need to consider FPR
# values up to 1/2.
false_positive_rates = false_positive_rates[false_positive_rates <= 0.5]

if self._pmf_remove._infinity_mass > false_positive_rates.min(): # pylint:disable=protected-access
return np.inf
if not self._symmetric:
if self._pmf_add._infinity_mass > false_positive_rates.min(): # pylint:disable=protected-access
return np.inf

true_positive_rates = self.get_true_positive_rates(
false_positive_rates, deltas
)
return np.max(
stats.norm.ppf(true_positive_rates)
- stats.norm.ppf(false_positive_rates)
)


def identity(
value_discretization_interval: float = 1e-4,
Expand Down Expand Up @@ -1135,7 +1252,7 @@ def from_truncated_subsampled_gaussian_mechanism(
This method builds on from_gaussian_mechanism and hence supports the same
algorithms for constructing the privacy loss distribution as
from_gaussian_mechanism. See Sections 2.1 and 2.2 of supplementary material
for more details. See Section 4.3 of the supplementary material for more
for more details. See Section 5.3 of the supplementary material for more
details on the computation of the privacy loss distribution.

Args:
Expand Down Expand Up @@ -1211,7 +1328,7 @@ def from_privacy_parameters(
"""Constructs pessimistic PLD from epsilon and delta parameters.

When the mechanism is (epsilon, delta)-differentially private, the following
is a pessimistic estimate of its privacy loss distribution (see Section 3.5
is a pessimistic estimate of its privacy loss distribution (see Section 4.5
of the supplementary material for more explanation):
- infinity with probability delta.
- epsilon with probability (1 - delta) / (1 + exp(-eps))
Expand Down
Loading
Loading