From 44431d7b9e55f91282e2891e41b7b33b512781c4 Mon Sep 17 00:00:00 2001 From: Alex Ojica Date: Thu, 25 Sep 2025 10:35:11 -0700 Subject: [PATCH] Refactor contribution sampling and combiners to work with multiple features. PiperOrigin-RevId: 811408440 --- .../pipelinedp4j/api/AggregationSpec.kt | 65 +-- .../pipelinedp4j/api/Query.kt | 91 ++-- .../pipelinedp4j/api/QueryPerGroupResult.kt | 41 +- .../pipelinedp4j/core/Combiners.kt | 316 ++++++++---- .../pipelinedp4j/core/ContributionSampler.kt | 41 -- .../pipelinedp4j/core/DpEngine.kt | 48 +- .../pipelinedp4j/core/DpFunctionsParams.kt | 298 ++++++----- .../core/PartitionAndPerPartitionSampler.kt | 29 +- .../pipelinedp4j/proto/accumulators.proto | 25 +- .../pipelinedp4j/proto/dpaggregates.proto | 18 +- .../pipelinedp4j/beam/BeamEncodersTest.kt | 11 +- .../pipelinedp4j/core/CompoundCombinerTest.kt | 261 +++++++--- .../pipelinedp4j/core/CountCombinerTest.kt | 59 ++- .../pipelinedp4j/core/DpEngineTest.kt | 264 +++++++--- .../core/DpFunctionsParamsTest.kt | 486 ++++++++++++------ .../pipelinedp4j/core/EndToEndTest.kt | 76 ++- .../core/ExactPrivacyIdCountCombinerTest.kt | 5 +- .../pipelinedp4j/core/MeanCombinerTest.kt | 249 ++++++--- .../pipelinedp4j/core/NoPrivacySamplerTest.kt | 29 +- .../PartitionAndPerPartitionSamplerTest.kt | 57 +- .../pipelinedp4j/core/PartitionSamplerTest.kt | 51 +- .../core/PartitionSamplerWithoutValuesTest.kt | 2 +- .../PerPartitionContributionsSamplerTest.kt | 68 ++- ...gregationPartitionSelectionCombinerTest.kt | 21 +- .../core/PrivacyIdCombinerTest.kt | 21 +- ...PrivatePartitionsComputationalGraphTest.kt | 29 +- .../core/PrivatePartitionsTest.kt | 2 +- .../PublicPartitionsComputationalGraphTest.kt | 90 +++- .../core/QuantilesCombinerTest.kt | 53 +- .../pipelinedp4j/core/SumCombinerTest.kt | 228 ++++++-- .../pipelinedp4j/core/VarianceCombinerTest.kt | 351 ++++++++++--- .../core/VectorSumCombinerTest.kt | 366 +++++++++---- .../pipelinedp4j/spark/SparkEncodersTest.kt | 11 +- 33 files changed, 2614 insertions(+), 1148 deletions(-) diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/AggregationSpec.kt b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/AggregationSpec.kt index 441b091f..6974f41e 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/AggregationSpec.kt +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/AggregationSpec.kt @@ -148,58 +148,20 @@ internal fun AggregationSpec.getFeatureId(): String { } } -internal fun List.metrics(): List = buildList { - for (aggregation in this@metrics) { - when (aggregation) { - // Count and PrivacyIdCount do not aggregate any specific value, therefore they are handled - // differently. - is PrivacyIdCount -> - add( - MetricDefinition( - MetricType.PRIVACY_ID_COUNT, - aggregation.budget?.toInternalBudgetPerOpSpec(), - ) - ) - is Count -> - add(MetricDefinition(MetricType.COUNT, aggregation.budget?.toInternalBudgetPerOpSpec())) - is ValueAggregations<*> -> { - for (valueAggregationSpec in aggregation.valueAggregationSpecs) { - add( - MetricDefinition( - valueAggregationSpec.metricType, - valueAggregationSpec.budget?.toInternalBudgetPerOpSpec(), - ) - ) - } - } - is VectorAggregations<*> -> { - for (vectorAggregationSpec in aggregation.vectorAggregationSpecs) { - add( - MetricDefinition( - vectorAggregationSpec.metricType, - vectorAggregationSpec.budget?.toInternalBudgetPerOpSpec(), - ) - ) - } - } - } - } -} - internal fun List.outputColumnNamesWithMetricTypes(): List> = buildList { for (aggregation in this@outputColumnNamesWithMetricTypes) { when (aggregation) { - is PrivacyIdCount -> add(aggregation.outputColumnName to MetricType.PRIVACY_ID_COUNT) - is Count -> add(aggregation.outputColumnName to MetricType.COUNT) + is PrivacyIdCount -> add(Pair(aggregation.outputColumnName, MetricType.PRIVACY_ID_COUNT)) + is Count -> add(Pair(aggregation.outputColumnName, MetricType.COUNT)) is ValueAggregations<*> -> { for (valueAggregationSpec in aggregation.valueAggregationSpecs) { - add(valueAggregationSpec.outputColumnName to valueAggregationSpec.metricType) + add(Pair(valueAggregationSpec.outputColumnName, valueAggregationSpec.metricType)) } } is VectorAggregations<*> -> { for (vectorAggregationSpec in aggregation.vectorAggregationSpecs) { - add(vectorAggregationSpec.outputColumnName to vectorAggregationSpec.metricType) + add(Pair(vectorAggregationSpec.outputColumnName, vectorAggregationSpec.metricType)) } } } @@ -227,3 +189,22 @@ internal fun List.outputColumnNameToFeatureIdMap(): Map.outputColumnNames(): List = outputColumnNamesWithMetricTypes().map { it.first } + +internal fun AggregationSpec.toNonFeatureMetricDefinition(): MetricDefinition { + val (metricType, budget) = + when (this) { + is Count -> Pair(MetricType.COUNT, this.budget) + is PrivacyIdCount -> Pair(MetricType.PRIVACY_ID_COUNT, this.budget) + else -> + throw IllegalArgumentException("Unsupported AggregationSpec type for non feature metrics") + } + return MetricDefinition(metricType, budget?.toInternalBudgetPerOpSpec()) +} + +internal fun ValueAggregationSpec.toMetricDefinition(): MetricDefinition { + return MetricDefinition(this.metricType, this.budget?.toInternalBudgetPerOpSpec()) +} + +internal fun VectorAggregationSpec.toMetricDefinition(): MetricDefinition { + return MetricDefinition(this.metricType, this.budget?.toInternalBudgetPerOpSpec()) +} diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/Query.kt b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/Query.kt index 16dde3d5..6b954284 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/Query.kt +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/Query.kt @@ -23,16 +23,16 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.core.DpEngine import com.google.privacy.differentialprivacy.pipelinedp4j.core.DpEngineBudgetSpec import com.google.privacy.differentialprivacy.pipelinedp4j.core.Encoder import com.google.privacy.differentialprivacy.pipelinedp4j.core.EncoderFactory +import com.google.privacy.differentialprivacy.pipelinedp4j.core.FeatureSpec import com.google.privacy.differentialprivacy.pipelinedp4j.core.FeatureValuesExtractor import com.google.privacy.differentialprivacy.pipelinedp4j.core.FrameworkCollection import com.google.privacy.differentialprivacy.pipelinedp4j.core.FrameworkTable import com.google.privacy.differentialprivacy.pipelinedp4j.core.MetricType +import com.google.privacy.differentialprivacy.pipelinedp4j.core.ScalarFeatureSpec import com.google.privacy.differentialprivacy.pipelinedp4j.core.SelectPartitionsParams +import com.google.privacy.differentialprivacy.pipelinedp4j.core.VectorFeatureSpec import com.google.privacy.differentialprivacy.pipelinedp4j.proto.DpAggregates -import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PerFeature import com.google.privacy.differentialprivacy.pipelinedp4j.proto.copy -import com.google.privacy.differentialprivacy.pipelinedp4j.proto.dpAggregates -import com.google.privacy.differentialprivacy.pipelinedp4j.proto.perFeature sealed interface Query { /** Executes the query (in production mode). */ @@ -142,21 +142,6 @@ protected constructor( valueAndVectorAggs.map { it.getFeatureId() } } return aggResults - .zip(featureIdPerRun) - .map { (table, featureId) -> - table.mapValues("TagWithFeatureId", encoderFactory.protos(DpAggregates::class)) { _, agg -> - if (featureId == null) { - agg - } else { - val perFeature = constructPerFeature(agg, featureId) - dpAggregates { - count = agg.count - privacyIdCount = agg.privacyIdCount - this.perFeature += perFeature - } - } - } - } .reduce { acc: FrameworkTable, table: FrameworkTable -> @@ -494,43 +479,59 @@ protected constructor( valueAggregations: ValueAggregations<*>?, vectorAggregations: VectorAggregations<*>?, ): AggregationParams { - val valueContributionBounds = valueAggregations?.contributionBounds - val vectorContributionBounds = vectorAggregations?.vectorContributionBounds + val nonFeatureMetrics = + aggregationSpecs + .filter { it is Count || it is PrivacyIdCount } + .map { it.toNonFeatureMetricDefinition() } + val features = + buildList { + if (valueAggregations != null) { + val valueContributionBounds = valueAggregations.contributionBounds + add( + ScalarFeatureSpec( + featureId = valueAggregations.getFeatureId(), + metrics = + valueAggregations.valueAggregationSpecs + .map { it.toMetricDefinition() } + .toImmutableList(), + minValue = valueContributionBounds.valueBounds?.minValue, + maxValue = valueContributionBounds.valueBounds?.maxValue, + minTotalValue = valueContributionBounds.totalValueBounds?.minValue, + maxTotalValue = valueContributionBounds.totalValueBounds?.maxValue, + ) + ) + } + if (vectorAggregations != null) { + val vectorContributionBounds = vectorAggregations.vectorContributionBounds + add( + VectorFeatureSpec( + featureId = vectorAggregations.getFeatureId(), + metrics = + vectorAggregations.vectorAggregationSpecs + .map { it.toMetricDefinition() } + .toImmutableList(), + vectorSize = vectorAggregations.vectorSize, + normKind = vectorContributionBounds.maxVectorTotalNorm.normKind.toInternalNormKind(), + vectorMaxTotalNorm = vectorContributionBounds.maxVectorTotalNorm.value, + ) + ) + } + } + return AggregationParams( - metrics = ImmutableList.copyOf(aggregationSpecs.metrics()), + nonFeatureMetrics = nonFeatureMetrics.toImmutableList(), + features = features.toImmutableList(), noiseKind = checkNotNull(noiseKind) { "noiseKind cannot be null if there are aggregations." } .toInternalNoiseKind(), maxPartitionsContributed = contributionBoundingLevel.getMaxPartitionsContributed(), maxContributionsPerPartition = contributionBoundingLevel.getMaxContributionsPerPartition(), - minValue = valueContributionBounds?.valueBounds?.minValue, - maxValue = valueContributionBounds?.valueBounds?.maxValue, - minTotalValue = valueContributionBounds?.totalValueBounds?.minValue, - maxTotalValue = valueContributionBounds?.totalValueBounds?.maxValue, - vectorNormKind = vectorContributionBounds?.maxVectorTotalNorm?.normKind?.toInternalNormKind(), - vectorMaxTotalNorm = vectorContributionBounds?.maxVectorTotalNorm?.value, - vectorSize = vectorAggregations?.vectorSize, partitionSelectionBudget = groupsType.getBudget()?.toInternalBudgetPerOpSpec(), preThreshold = groupsType.getPreThreshold(), contributionBoundingLevel = contributionBoundingLevel.toInternalContributionBoundingLevel(), partitionsBalance = groupByAdditionalParameters.groupsBalance.toPartitionsBalance(), ) } - - companion object { - private fun constructPerFeature(dpAggregates: DpAggregates, featureId: String): PerFeature { - return perFeature { - this.featureId = featureId - sum = dpAggregates.sum - mean = dpAggregates.mean - variance = dpAggregates.variance - if (dpAggregates.quantilesList.isNotEmpty()) { - quantiles += dpAggregates.quantilesList - } - if (dpAggregates.vectorSumList.isNotEmpty()) { - vectorSum += dpAggregates.vectorSumList - } - } - } - } } + +private fun Iterable.toImmutableList(): ImmutableList = ImmutableList.copyOf(this) diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/QueryPerGroupResult.kt b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/QueryPerGroupResult.kt index cc30f7b3..68fe3cde 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/QueryPerGroupResult.kt +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/QueryPerGroupResult.kt @@ -115,40 +115,23 @@ internal constructor( MetricType.PRIVACY_ID_COUNT -> put(outputColumnName, dpAggregates.privacyIdCount) MetricType.COUNT -> put(outputColumnName, dpAggregates.count) MetricType.SUM -> { - if (dpAggregates.perFeatureList.isNotEmpty()) { - val featureId = columnNameToFeatureIdMap[outputColumnName]!! - put(outputColumnName, featuresMap[featureId]!!.sum) - } else { - put(outputColumnName, dpAggregates.sum) - } + val featureId = columnNameToFeatureIdMap[outputColumnName]!! + put(outputColumnName, featuresMap[featureId]!!.sum) } MetricType.VECTOR_SUM -> {} // not processed in this function. MetricType.MEAN -> { - if (dpAggregates.perFeatureList.isNotEmpty()) { - val featureId = columnNameToFeatureIdMap[outputColumnName]!! - put(outputColumnName, featuresMap[featureId]!!.mean) - } else { - put(outputColumnName, dpAggregates.mean) - } + val featureId = columnNameToFeatureIdMap[outputColumnName]!! + put(outputColumnName, featuresMap[featureId]!!.mean) } MetricType.VARIANCE -> { - if (dpAggregates.perFeatureList.isNotEmpty()) { - val featureId = columnNameToFeatureIdMap[outputColumnName]!! - put(outputColumnName, featuresMap[featureId]!!.variance) - } else { - put(outputColumnName, dpAggregates.variance) - } + val featureId = columnNameToFeatureIdMap[outputColumnName]!! + put(outputColumnName, featuresMap[featureId]!!.variance) } is MetricType.QUANTILES -> { // TODO: consider creating a data class or resuing copy of // DpAggregates proto and not allowing outputColumnName. - val quantilesList = - if (dpAggregates.perFeatureList.isNotEmpty()) { - val featureId = columnNameToFeatureIdMap[outputColumnName]!! - featuresMap[featureId]!!.quantilesList - } else { - dpAggregates.quantilesList - } + val featureId = columnNameToFeatureIdMap[outputColumnName]!! + val quantilesList = featuresMap[featureId]!!.quantilesList for ((rank, value) in metricType.sortedRanks.zip(quantilesList)) { put(outputColumnName.withRank(rank), value) } @@ -171,12 +154,8 @@ internal constructor( MetricType.COUNT -> {} // not processed in this function. MetricType.SUM -> {} // not processed in this function. MetricType.VECTOR_SUM -> { - if (dpAggregates.perFeatureList.isNotEmpty()) { - val featureId = colNameToFeatureIdMap[outputColumnName]!! - put(outputColumnName, featuresMap[featureId]!!.vectorSumList) - } else { - put(outputColumnName, dpAggregates.vectorSumList) - } + val featureId = colNameToFeatureIdMap[outputColumnName]!! + put(outputColumnName, featuresMap[featureId]!!.vectorSumList) } MetricType.MEAN -> {} // not processed in this function. MetricType.VARIANCE -> {} // not processed in this function. diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/Combiners.kt b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/Combiners.kt index 790f1b95..df6f4e48 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/Combiners.kt +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/Combiners.kt @@ -30,6 +30,7 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.proto.CompoundAccumul import com.google.privacy.differentialprivacy.pipelinedp4j.proto.CountAccumulator import com.google.privacy.differentialprivacy.pipelinedp4j.proto.DpAggregates import com.google.privacy.differentialprivacy.pipelinedp4j.proto.MeanAccumulator +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PerFeature import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributions import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdCountAccumulator import com.google.privacy.differentialprivacy.pipelinedp4j.proto.QuantilesAccumulator @@ -37,9 +38,11 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.proto.SumAccumulator import com.google.privacy.differentialprivacy.pipelinedp4j.proto.VarianceAccumulator import com.google.privacy.differentialprivacy.pipelinedp4j.proto.VectorSumAccumulator import com.google.privacy.differentialprivacy.pipelinedp4j.proto.compoundAccumulator +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.copy import com.google.privacy.differentialprivacy.pipelinedp4j.proto.countAccumulator import com.google.privacy.differentialprivacy.pipelinedp4j.proto.dpAggregates import com.google.privacy.differentialprivacy.pipelinedp4j.proto.meanAccumulator +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.perFeature import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdCountAccumulator import com.google.privacy.differentialprivacy.pipelinedp4j.proto.quantilesAccumulator import com.google.privacy.differentialprivacy.pipelinedp4j.proto.sumAccumulator @@ -390,7 +393,17 @@ class SumCombiner( private val budget: AllocatedBudget, private val noiseFactory: (NoiseKind) -> Noise, private val executionMode: ExecutionMode, + private val featureSpec: ScalarFeatureSpec, ) : Combiner, Serializable { + private val featureMinTotalValue = + checkNotNull(featureSpec.minTotalValue) { + "minTotalValue should be set when requesting SUM metric for feature ${featureSpec.featureId}" + } + private val featureMaxTotalValue = + checkNotNull(featureSpec.maxTotalValue) { + "maxTotalValue should be set when requesting SUM metric for feature ${featureSpec.featureId}" + } + override val requiresPerPartitionBoundedInput = false /** @@ -403,22 +416,26 @@ class SumCombiner( * @return a new `SumAccumulator` initialized with the sum of the contributions, potentially * bounded based on the privacy level. */ - override fun createAccumulator(contributions: PrivacyIdContributions): SumAccumulator = - sumAccumulator { + override fun createAccumulator(contributions: PrivacyIdContributions): SumAccumulator { + val featureContributions = + contributions.featuresList.find { it.featureId == featureSpec.featureId } + return sumAccumulator { + featureId = featureSpec.featureId sum = - if (contributions.singleValueContributionsList.isEmpty()) { + if (featureContributions == null) { 0.0 } else { - contributions.singleValueContributionsList + featureContributions.singleValueContributionsList .sum() .coerceInIfContributionBoundingEnabled( - aggregationParams.minTotalValue!!, - aggregationParams.maxTotalValue!!, + featureMinTotalValue, + featureMaxTotalValue, aggregationParams, executionMode, ) } } + } /** * Merges two [SumAccumulator] instances by adding their sums. @@ -430,7 +447,10 @@ class SumCombiner( override fun mergeAccumulators( accumulator1: SumAccumulator, accumulator2: SumAccumulator, - ): SumAccumulator = sumAccumulator { sum = accumulator1.sum + accumulator2.sum } + ): SumAccumulator = sumAccumulator { + featureId = accumulator1.featureId + sum = accumulator1.sum + accumulator2.sum + } /** * Computes a noisy sum from the given [SumAccumulator]. @@ -440,8 +460,7 @@ class SumCombiner( */ override fun computeMetrics(accumulator: SumAccumulator): Double { val noise = noiseFactory(aggregationParams.noiseKind) - val lInfSensitivity = - max(abs(aggregationParams.minTotalValue!!), abs(aggregationParams.maxTotalValue!!)) + val lInfSensitivity = max(abs(featureMinTotalValue), abs(featureMaxTotalValue)) return noise.addNoise( accumulator.sum, @@ -468,7 +487,12 @@ class VectorSumCombiner( private val budget: AllocatedBudget, private val noiseFactory: (NoiseKind) -> Noise, private val executionMode: ExecutionMode, + private val featureSpec: VectorFeatureSpec, ) : Combiner>, Serializable { + private val normKind = featureSpec.normKind + private val vectorMaxTotalNorm = featureSpec.vectorMaxTotalNorm + private val vectorSize = featureSpec.vectorSize + override val requiresPerPartitionBoundedInput = false /** @@ -482,16 +506,17 @@ class VectorSumCombiner( * bounded based on the privacy level. */ override fun createAccumulator(contributions: PrivacyIdContributions) = vectorSumAccumulator { + val featureContributions = + contributions.featuresList.find { it.featureId == featureSpec.featureId } + featureId = featureSpec.featureId sumsPerDimension += - contributions.multiValueContributionsList - .map { contribution -> ArrayRealVector(contribution.valuesList.toDoubleArray()) } - .reduceOrNull { acc, vector -> acc.add(vector) } - ?.clipIfContributionBoundingEnabled( - aggregationParams.vectorMaxTotalNorm!!, - aggregationParams.vectorNormKind!!, - ) + featureContributions + ?.multiValueContributionsList + ?.map { contribution -> ArrayRealVector(contribution.valuesList.toDoubleArray()) } + ?.reduceOrNull { acc, vector -> acc.add(vector) } + ?.clipIfContributionBoundingEnabled(vectorMaxTotalNorm, normKind) ?.toArray() - ?.asList() ?: List(aggregationParams.vectorSize!!) { 0.0 } + ?.asList() ?: List(vectorSize) { 0.0 } } /** @@ -505,6 +530,7 @@ class VectorSumCombiner( accumulator1: VectorSumAccumulator, accumulator2: VectorSumAccumulator, ) = vectorSumAccumulator { + featureId = accumulator1.featureId sumsPerDimension += accumulator1.sumsPerDimensionList.zip(accumulator2.sumsPerDimensionList).map { (e1, e2) -> e1 + e2 @@ -525,9 +551,9 @@ class VectorSumCombiner( is LaplaceNoise -> { val l1Sensitivity = calculateL1Sensistivity( - aggregationParams.vectorNormKind!!, - aggregationParams.vectorMaxTotalNorm!!, - aggregationParams.vectorSize!!, + normKind, + vectorMaxTotalNorm, + vectorSize, aggregationParams.maxPartitionsContributed!!, ) vector.map { noise.addNoise(it, l1Sensitivity, budget.epsilon(), budget.delta()) } @@ -535,9 +561,9 @@ class VectorSumCombiner( is GaussianNoise -> { val l2Sensitivity = calculateL2Sensistivity( - aggregationParams.vectorNormKind!!, - aggregationParams.vectorMaxTotalNorm!!, - aggregationParams.vectorSize!!, + normKind, + vectorMaxTotalNorm, + vectorSize, aggregationParams.maxPartitionsContributed!!, ) vector.map { noise.addNoise(it, l2Sensitivity, budget.epsilon(), budget.delta()) } @@ -631,10 +657,19 @@ class MeanCombiner( private val sumBudget: AllocatedBudget, private val noiseFactory: (NoiseKind) -> Noise, private val executionMode: ExecutionMode, + private val featureSpec: ScalarFeatureSpec, ) : Combiner, Serializable { - private val midValue = (aggregationParams.minValue!! + aggregationParams.maxValue!!) / 2 - private val returnCount = aggregationParams.metrics.any { it.type == COUNT } - private val returnSum = aggregationParams.metrics.any { it.type == SUM } + private val featureMinValue = + checkNotNull(featureSpec.minValue) { + "minValue should be set when requesting mean metric for feature ${featureSpec.featureId}" + } + private val featureMaxValue = + checkNotNull(featureSpec.maxValue) { + "maxValue should be set when requesting mean metric for feature ${featureSpec.featureId}" + } + private val midValue = (featureMinValue + featureMaxValue) / 2 + private val returnCount = aggregationParams.nonFeatureMetrics.any { it.type == COUNT } + private val returnSum = featureSpec.metrics.any { it.type == SUM } override val requiresPerPartitionBoundedInput = true @@ -652,13 +687,17 @@ class MeanCombiner( // half the sensitivity it would otherwise take for better accuracy (as compared // to doing noisy sum / noisy count). meanAccumulator { - count = contributions.singleValueContributionsList.size.toLong() + val featureContributions = + contributions.featuresList.find { it.featureId == featureSpec.featureId } + val values = featureContributions?.singleValueContributionsList ?: emptyList() + featureId = featureSpec.featureId + count = values.size.toLong() normalizedSum = - contributions.singleValueContributionsList + values .map { it.coerceInIfContributionBoundingEnabled( - aggregationParams.minValue!!, - aggregationParams.maxValue!!, + featureMinValue, + featureMaxValue, aggregationParams, executionMode, ) - midValue @@ -677,6 +716,7 @@ class MeanCombiner( accumulator1: MeanAccumulator, accumulator2: MeanAccumulator, ): MeanAccumulator = meanAccumulator { + featureId = accumulator1.featureId count = accumulator1.count + accumulator2.count normalizedSum = accumulator1.normalizedSum + accumulator2.normalizedSum } @@ -692,6 +732,7 @@ class MeanCombiner( val dpNormalizedSum = getNoisedNormalizedSum( accumulator.normalizedSum, + featureMaxValue, midValue, aggregationParams, sumBudget, @@ -734,7 +775,17 @@ class QuantilesCombiner( private val budget: AllocatedBudget, private val noiseFactory: (NoiseKind) -> Noise, private val executionMode: ExecutionMode, + private val featureSpec: ScalarFeatureSpec, ) : Combiner>, Serializable { + private val featureMinValue = + checkNotNull(featureSpec.minValue) { + "minValue should be set when requesting quantiles metric for feature ${featureSpec.featureId}" + } + private val featureMaxValue = + checkNotNull(featureSpec.maxValue) { + "maxValue should be set when requesting quantiles metric for feature ${featureSpec.featureId}" + } + override val requiresPerPartitionBoundedInput = true /** @@ -749,8 +800,12 @@ class QuantilesCombiner( */ override fun createAccumulator(contributions: PrivacyIdContributions): QuantilesAccumulator = quantilesAccumulator { + val featureContributions = + contributions.featuresList.find { it.featureId == featureSpec.featureId } + val values = featureContributions?.singleValueContributionsList ?: emptyList() + featureId = featureSpec.featureId val boundedQuantiles = emptyBoundedQuantiles() - boundedQuantiles.addEntries(contributions.singleValueContributionsList) + boundedQuantiles.addEntries(values) serializedQuantilesSummary = ByteString.copyFrom(boundedQuantiles.serializableSummary) } @@ -765,6 +820,7 @@ class QuantilesCombiner( accumulator1: QuantilesAccumulator, accumulator2: QuantilesAccumulator, ): QuantilesAccumulator = quantilesAccumulator { + featureId = accumulator1.featureId val boundedQuantiles = emptyBoundedQuantiles() boundedQuantiles.mergeWith(accumulator1.serializedQuantilesSummary.toByteArray()) boundedQuantiles.mergeWith(accumulator2.serializedQuantilesSummary.toByteArray()) @@ -820,8 +876,8 @@ class QuantilesCombiner( ) // Min and max values aren't changed if there is no contribution bounding because the extreme // values aren't supported by the DP library. - .lower(aggregationParams.minValue!!) - .upper(aggregationParams.maxValue!!) + .lower(featureMinValue) + .upper(featureMaxValue) .build() } @@ -846,11 +902,20 @@ class VarianceCombiner( private val sumSquaresBudget: AllocatedBudget, private val noiseFactory: (NoiseKind) -> Noise, private val executionMode: ExecutionMode, + private val featureSpec: ScalarFeatureSpec, ) : Combiner, Serializable { - private val midValue = (aggregationParams.minValue!! + aggregationParams.maxValue!!) / 2 - private val returnCount = aggregationParams.metrics.any { it.type == COUNT } - private val returnSum = aggregationParams.metrics.any { it.type == SUM } - private val returnMean = aggregationParams.metrics.any { it.type == MEAN } + private val featureMinValue = + checkNotNull(featureSpec.minValue) { + "minValue should be set when requesting variance metrics for feature ${featureSpec.featureId}" + } + private val featureMaxValue = + checkNotNull(featureSpec.maxValue) { + "maxValue should be set when requesting variance metrics for feature ${featureSpec.featureId}" + } + private val midValue = (featureMinValue + featureMaxValue) / 2 + private val returnCount = aggregationParams.nonFeatureMetrics.any { it.type == COUNT } + private val returnSum = featureSpec.metrics.any { it.type == SUM } + private val returnMean = featureSpec.metrics.any { it.type == MEAN } override val requiresPerPartitionBoundedInput = true @@ -869,11 +934,15 @@ class VarianceCombiner( // half the sensitivity it would otherwise take for better accuracy (as compared // to doing noisy sum / noisy count). varianceAccumulator { + val featureContributions = + contributions.featuresList.find { it.featureId == featureSpec.featureId } + val values = featureContributions?.singleValueContributionsList ?: emptyList() + featureId = featureSpec.featureId val coercedValues = - contributions.singleValueContributionsList.map { + values.map { it.coerceInIfContributionBoundingEnabled( - aggregationParams.minValue!!, - aggregationParams.maxValue!!, + featureMinValue, + featureMaxValue, aggregationParams, executionMode, ) - midValue @@ -895,6 +964,7 @@ class VarianceCombiner( accumulator1: VarianceAccumulator, accumulator2: VarianceAccumulator, ): VarianceAccumulator = varianceAccumulator { + featureId = accumulator1.featureId count = accumulator1.count + accumulator2.count normalizedSum = accumulator1.normalizedSum + accumulator2.normalizedSum normalizedSumSquares = accumulator1.normalizedSumSquares + accumulator2.normalizedSumSquares @@ -913,6 +983,7 @@ class VarianceCombiner( val dpNormalizedSum = getNoisedNormalizedSum( accumulator.normalizedSum, + featureMaxValue, midValue, aggregationParams, sumBudget, @@ -921,6 +992,7 @@ class VarianceCombiner( val dpNormalizedSumSquares = getNoisedNormalizedSumOfSquares( accumulator.normalizedSumSquares, + featureMaxValue, midValue, aggregationParams, sumSquaresBudget, @@ -995,11 +1067,11 @@ class CompoundCombiner(val combiners: Iterable>) : is PostAggregationPartitionSelectionCombiner -> privacyIdCountAccumulator = combiner.createAccumulator(contributions) is CountCombiner -> countAccumulator = combiner.createAccumulator(contributions) - is SumCombiner -> sumAccumulator = combiner.createAccumulator(contributions) - is VectorSumCombiner -> vectorSumAccumulator = combiner.createAccumulator(contributions) - is MeanCombiner -> meanAccumulator = combiner.createAccumulator(contributions) - is QuantilesCombiner -> quantilesAccumulator = combiner.createAccumulator(contributions) - is VarianceCombiner -> varianceAccumulator = combiner.createAccumulator(contributions) + is SumCombiner -> sumAccumulators += combiner.createAccumulator(contributions) + is VectorSumCombiner -> vectorSumAccumulators += combiner.createAccumulator(contributions) + is MeanCombiner -> meanAccumulators += combiner.createAccumulator(contributions) + is QuantilesCombiner -> quantilesAccumulators += combiner.createAccumulator(contributions) + is VarianceCombiner -> varianceAccumulators += combiner.createAccumulator(contributions) is CompoundCombiner -> throwIfCompoundCombiner() } } @@ -1040,33 +1112,49 @@ class CompoundCombiner(val combiners: Iterable>) : is CountCombiner -> countAccumulator = combiner.mergeAccumulators(accumulator1.countAccumulator, accumulator2.countAccumulator) - is SumCombiner -> - sumAccumulator = - combiner.mergeAccumulators(accumulator1.sumAccumulator, accumulator2.sumAccumulator) - is VectorSumCombiner -> - vectorSumAccumulator = - combiner.mergeAccumulators( - accumulator1.vectorSumAccumulator, - accumulator2.vectorSumAccumulator, - ) - is MeanCombiner -> - meanAccumulator = - combiner.mergeAccumulators(accumulator1.meanAccumulator, accumulator2.meanAccumulator) - is VarianceCombiner -> - varianceAccumulator = - combiner.mergeAccumulators( - accumulator1.varianceAccumulator, - accumulator2.varianceAccumulator, - ) - is QuantilesCombiner -> - quantilesAccumulator = - combiner.mergeAccumulators( - accumulator1.quantilesAccumulator, - accumulator2.quantilesAccumulator, - ) + is SumCombiner -> {} + is VectorSumCombiner -> {} + is MeanCombiner -> {} + is VarianceCombiner -> {} + is QuantilesCombiner -> {} is CompoundCombiner -> throwIfCompoundCombiner() } } + combiners.filterIsInstance().firstOrNull()?.let { combiner -> + sumAccumulators += + (accumulator1.sumAccumulatorsList + accumulator2.sumAccumulatorsList) + .groupBy { it.featureId } + .values + .map { it.reduce { acc1, acc2 -> combiner.mergeAccumulators(acc1, acc2) } } + } + combiners.filterIsInstance().firstOrNull()?.let { combiner -> + vectorSumAccumulators += + (accumulator1.vectorSumAccumulatorsList + accumulator2.vectorSumAccumulatorsList) + .groupBy { it.featureId } + .values + .map { it.reduce { acc1, acc2 -> combiner.mergeAccumulators(acc1, acc2) } } + } + combiners.filterIsInstance().firstOrNull()?.let { combiner -> + meanAccumulators += + (accumulator1.meanAccumulatorsList + accumulator2.meanAccumulatorsList) + .groupBy { it.featureId } + .values + .map { it.reduce { acc1, acc2 -> combiner.mergeAccumulators(acc1, acc2) } } + } + combiners.filterIsInstance().firstOrNull()?.let { combiner -> + varianceAccumulators += + (accumulator1.varianceAccumulatorsList + accumulator2.varianceAccumulatorsList) + .groupBy { it.featureId } + .values + .map { it.reduce { acc1, acc2 -> combiner.mergeAccumulators(acc1, acc2) } } + } + combiners.filterIsInstance().firstOrNull()?.let { combiner -> + quantilesAccumulators += + (accumulator1.quantilesAccumulatorsList + accumulator2.quantilesAccumulatorsList) + .groupBy { it.featureId } + .values + .map { it.reduce { acc1, acc2 -> combiner.mergeAccumulators(acc1, acc2) } } + } } /** @@ -1075,50 +1163,66 @@ class CompoundCombiner(val combiners: Iterable>) : * @param accumulator the [CompoundAccumulator] containing the aggregated data. * @return a [DpAggregates] object containing the computed results for all metrics. */ - override fun computeMetrics(accumulator: CompoundAccumulator) = dpAggregates { + override fun computeMetrics(accumulator: CompoundAccumulator): DpAggregates { + var resCount: Double? = null + var resPidCount: Double? = null + val featureMetrics = mutableMapOf() + + fun getFeature(fid: String) = featureMetrics.getOrPut(fid) { perFeature { featureId = fid } } + for (combiner in combiners) { when (combiner) { is PrivacyIdCountCombiner -> - privacyIdCount = combiner.computeMetrics(accumulator.privacyIdCountAccumulator) + resPidCount = combiner.computeMetrics(accumulator.privacyIdCountAccumulator) is ExactPrivacyIdCountCombiner -> {} // no anonymized output is PostAggregationPartitionSelectionCombiner -> { - val noisedPrivacyIdCount = combiner.computeMetrics(accumulator.privacyIdCountAccumulator) - if (noisedPrivacyIdCount != null) { - privacyIdCount = noisedPrivacyIdCount - } + combiner.computeMetrics(accumulator.privacyIdCountAccumulator)?.let { resPidCount = it } } - is CountCombiner -> count = combiner.computeMetrics(accumulator.countAccumulator) - is SumCombiner -> sum = combiner.computeMetrics(accumulator.sumAccumulator) + is CountCombiner -> resCount = combiner.computeMetrics(accumulator.countAccumulator) + is SumCombiner -> + for (acc in accumulator.sumAccumulatorsList) { + featureMetrics[acc.featureId] = + getFeature(acc.featureId).copy { sum = combiner.computeMetrics(acc) } + } is VectorSumCombiner -> - vectorSum += combiner.computeMetrics(accumulator.vectorSumAccumulator) - is MeanCombiner -> { - val meanResult = combiner.computeMetrics(accumulator.meanAccumulator) - mean = meanResult.mean - if (meanResult.sum != null) { - sum = meanResult.sum + for (acc in accumulator.vectorSumAccumulatorsList) { + featureMetrics[acc.featureId] = + getFeature(acc.featureId).copy { vectorSum += combiner.computeMetrics(acc) } } - if (meanResult.count != null) { - count = meanResult.count + is MeanCombiner -> + for (acc in accumulator.meanAccumulatorsList) { + val meanResult = combiner.computeMetrics(acc) + featureMetrics[acc.featureId] = + getFeature(acc.featureId).copy { + mean = meanResult.mean + meanResult.sum?.let { sum = it } + } + meanResult.count?.let { resCount = it } } - } is QuantilesCombiner -> - quantiles += combiner.computeMetrics(accumulator.quantilesAccumulator) - is VarianceCombiner -> { - val varianceResult = combiner.computeMetrics(accumulator.varianceAccumulator) - variance = varianceResult.variance - if (varianceResult.count != null) { - count = varianceResult.count + for (acc in accumulator.quantilesAccumulatorsList) { + featureMetrics[acc.featureId] = + getFeature(acc.featureId).copy { quantiles += combiner.computeMetrics(acc) } } - if (varianceResult.sum != null) { - sum = varianceResult.sum - } - if (varianceResult.mean != null) { - mean = varianceResult.mean + is VarianceCombiner -> + for (acc in accumulator.varianceAccumulatorsList) { + val varianceResult = combiner.computeMetrics(acc) + featureMetrics[acc.featureId] = + getFeature(acc.featureId).copy { + variance = varianceResult.variance + varianceResult.sum?.let { sum = it } + varianceResult.mean?.let { mean = it } + } + varianceResult.count?.let { resCount = it } } - } is CompoundCombiner -> throwIfCompoundCombiner() } } + return dpAggregates { + resCount?.let { this.count = it } + resPidCount?.let { this.privacyIdCount = it } + perFeature += featureMetrics.values + } } /** @@ -1173,6 +1277,7 @@ private fun getNoisedCount( private fun getNoisedNormalizedSum( normalizedSum: Double, + featureMaxValue: Double, midValue: Double, aggregationParams: AggregationParams, sumBudget: AllocatedBudget, @@ -1182,7 +1287,7 @@ private fun getNoisedNormalizedSum( // All values were normalized to the symmetric range [minValue-midValue, maxValue-midValue]. // So the linf sensitivity of 1 record is (maxValue-midValue). val lInfSensitivity = - (aggregationParams.maxValue!! - midValue) * aggregationParams.maxContributionsPerPartition!! + (featureMaxValue - midValue) * aggregationParams.maxContributionsPerPartition!! return noise.addNoise( normalizedSum, aggregationParams.maxPartitionsContributed!!, @@ -1194,6 +1299,7 @@ private fun getNoisedNormalizedSum( private fun getNoisedNormalizedSumOfSquares( normalizedSumOfSquares: Double, + featureMaxValue: Double, midValue: Double, aggregationParams: AggregationParams, sumOfSquaresBudget: AllocatedBudget, @@ -1204,7 +1310,7 @@ private fun getNoisedNormalizedSumOfSquares( // were then squared and summed up. // So the linf sensitivity of 1 record is (maxValue-midValue)^2 distributed across allowed // partition contributions. - val distance = aggregationParams.maxValue!! - midValue + val distance = featureMaxValue - midValue val lInfSensitivity = distance * distance * aggregationParams.maxContributionsPerPartition!! return noise.addNoise( normalizedSumOfSquares, @@ -1216,10 +1322,10 @@ private fun getNoisedNormalizedSumOfSquares( } private fun PrivacyIdContributions.size(): Int { - if (singleValueContributionsCount > 0 && multiValueContributionsCount > 0) { - throw IllegalArgumentException( - "PrivacyIdContributions cannot have both single and multi value contributions." - ) + if (featuresList.isEmpty()) { + return 0 } - return max(singleValueContributionsCount, multiValueContributionsCount) + return featuresList + .map { feature -> feature.singleValueContributionsCount + feature.multiValueContributionsCount } + .maxOrNull() ?: 0 } diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/ContributionSampler.kt b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/ContributionSampler.kt index 0d5b07b9..228da530 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/ContributionSampler.kt +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/ContributionSampler.kt @@ -17,8 +17,6 @@ package com.google.privacy.differentialprivacy.pipelinedp4j.core import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributions -import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.multiValueContribution -import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions /** Bounds contributions to the entire non-aggregated data collection. */ sealed interface ContributionSampler { @@ -34,45 +32,6 @@ sealed interface ContributionSampler { ): FrameworkTable } -/** - * Samples contributions to [maxPartitionsContributed] partitions among the given [contributions] - * assuming that they all belong to the same [PrivacyId]. - */ -internal fun samplePartitions( - contributions: Iterable>, - maxPartitionsContributed: Int, -): Collection> { - val allPartitions = contributions.map { it.partitionKey() }.toSet() - val keptPartitions = sampleNElements(allPartitions, maxPartitionsContributed).toSet() - return contributions.filter { it.partitionKey() in keptPartitions } -} - -/** - * Samples [maxContributionsPerPartition] contributions among the given [partitionContributions] - * assuming that they all belong to the same [PrivacyId] and [PartitionKey]. Combines the result - * into a [PrivacyIdContributions] and returns it. - */ -internal fun sampleContributionsPerPartition( - partitionContributions: Iterable>, - maxContributionsPerPartition: Int, -): PrivacyIdContributions { - val sampledContributions = - sampleNElements(partitionContributions.toList(), maxContributionsPerPartition) - return privacyIdContributions { - for (contribution in sampledContributions) { - // TODO: Update to add support for multiple features. - // We expect that contribution contains only one feature with featureId="", - // produced by DataExtractors. - val perFeatureValues = contribution.perFeatureValues().single() - if (perFeatureValues.values.size == 1) { - singleValueContributions += perFeatureValues.values - } else { - multiValueContributions += multiValueContribution { values += perFeatureValues.values } - } - } - } -} - private fun sampleNElements(elements: Collection, N: Int): Collection { if (elements.size <= N) return elements return elements.shuffled().take(N) diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngine.kt b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngine.kt index 086a391d..5199ebae 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngine.kt +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngine.kt @@ -279,10 +279,14 @@ internal constructor( noiseFactory: (NoiseKind) -> Noise, executionMode: ExecutionMode, ): CompoundCombiner { - val meanInMetrics = params.metrics.any { it.type == MEAN } - val varianceInMetrics = params.metrics.any { it.type == VARIANCE } + require(params.features.size <= 1) { "Multi-feature aggregations are not supported yet" } + val featureSpec = params.features.singleOrNull() + val allMetrics = + (featureSpec?.metrics ?: emptyList()) + params.nonFeatureMetrics + val meanInMetrics = allMetrics.any { it.type == MEAN } + val varianceInMetrics = allMetrics.any { it.type == VARIANCE } val metricCombiners = - params.metrics + allMetrics .mapNotNull { metric -> when (metric.type) { PRIVACY_ID_COUNT -> { @@ -319,7 +323,13 @@ internal constructor( } SUM -> { if (!meanInMetrics && !varianceInMetrics) { - SumCombiner(params, getBudgetForMetric(metric, params), noiseFactory, executionMode) + SumCombiner( + params, + getBudgetForMetric(metric, params), + noiseFactory, + executionMode, + featureSpec as ScalarFeatureSpec, + ) } else { null } @@ -330,12 +340,20 @@ internal constructor( getBudgetForMetric(metric, params), noiseFactory, executionMode, + featureSpec as VectorFeatureSpec, ) } MEAN -> { if (!varianceInMetrics) { val (countBudget, sumBudget) = calculateCountSumBudgetsForMean(params) - MeanCombiner(params, countBudget, sumBudget, noiseFactory, executionMode) + MeanCombiner( + params, + countBudget, + sumBudget, + noiseFactory, + executionMode, + featureSpec as ScalarFeatureSpec, + ) } else { null } @@ -349,6 +367,7 @@ internal constructor( sumSquaresBudget, noiseFactory, executionMode, + featureSpec as ScalarFeatureSpec, ) } @@ -359,12 +378,13 @@ internal constructor( getBudgetForMetric(metric, params), noiseFactory, executionMode, + featureSpec as ScalarFeatureSpec, ) } } } .toMutableList() - if (!usePublicPartitions && !params.metrics.any { it.type == PRIVACY_ID_COUNT }) { + if (!usePublicPartitions && !allMetrics.any { it.type == PRIVACY_ID_COUNT }) { // For private partitions, we need to compute the privacy ID count, even if PRIVACY_ID_COUNT // is not requested in metrics. metricCombiners.add(ExactPrivacyIdCountCombiner()) @@ -436,10 +456,13 @@ internal constructor( private fun calculateCountSumBudgetsForMean( params: AggregationParams ): Pair { - fun getMetricDefinition(metricType: MetricType) = params.metrics.find { it.type == metricType } + fun getMetricDefinitionFromFeature(metricType: MetricType) = + params.features.single().metrics.find { it.type == metricType } + fun getMetricDefinitionFromNonFeature(metricType: MetricType) = + params.nonFeatureMetrics.find { it.type == metricType } // meanDefinition is not null, because this function is called only when MEAN is in metrics. - val meanDefinition = getMetricDefinition(MEAN)!! + val meanDefinition = getMetricDefinitionFromFeature(MEAN)!! // Budget spec for COUNT. val countBudgetSpec: BudgetPerOpSpec = @@ -448,7 +471,8 @@ internal constructor( meanDefinition.budgetSpec!!.times(0.5) } else { // Or COUNT spec or the default budget spec. - getMetricDefinition(COUNT)?.budgetSpec ?: RelativeBudgetPerOpSpec(weight = 1.0) + getMetricDefinitionFromNonFeature(COUNT)?.budgetSpec + ?: RelativeBudgetPerOpSpec(weight = 1.0) } // Budget spec for SUM. @@ -458,7 +482,7 @@ internal constructor( meanDefinition.budgetSpec!!.times(0.5) } else { // Or SUM spec or the default budget spec. - getMetricDefinition(SUM)?.budgetSpec ?: RelativeBudgetPerOpSpec(weight = 1.0) + getMetricDefinitionFromFeature(SUM)?.budgetSpec ?: RelativeBudgetPerOpSpec(weight = 1.0) } return budgetAccountant.requestBudget( @@ -473,7 +497,7 @@ internal constructor( params: AggregationParams ): Triple { // Variance is not null because this function is called only when it is in metrics. - val varianceDefinition = params.metrics.find { it.type == VARIANCE }!! + val varianceDefinition = params.features.single().metrics.find { it.type == VARIANCE }!! // Budget is split equally between COUNT, SUM and SUM_SQUARES. val budgetSplit = 1.0 / 3.0 // If varianceDefinition.budgetSpec is null, the default budget spec is used. @@ -512,5 +536,5 @@ private fun usePostAggregationPartitionSelection( executionMode: ExecutionMode, ): Boolean = !usePublicPartitions && - params.metrics.any { it.type == PRIVACY_ID_COUNT } && + params.nonFeatureMetrics.any { it.type == PRIVACY_ID_COUNT } && executionMode.partitionSelectionIsNonDeterministic diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParams.kt b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParams.kt index dbb20caa..7ba21d42 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParams.kt +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParams.kt @@ -117,7 +117,8 @@ private fun partitionsContributedBoundingShouldBeApplied( @Immutable data class AggregationParams( /** The metrics being anonymized. */ - val metrics: ImmutableList, + val nonFeatureMetrics: ImmutableList, + val features: ImmutableList = ImmutableList.of(), val noiseKind: NoiseKind, /** * The maximum number of partitions that can be contributed by a privacy unit. Used by all @@ -134,37 +135,6 @@ data class AggregationParams( * Note this is mutually exclusive with maxContributionsPerPartition. */ val maxContributions: Int? = null, - /** - * The minimum bound on the individual value that can be contributed by a user to a partition. - * Used for MEAN and QUANTILES. - */ - val minValue: Double? = null, - /** - * The maximum bound on the individual value that can be contributed by a user to a partition. - * Used for MEAN and QUANTILES. - */ - val maxValue: Double? = null, - /** - * The minimum bound on the sum of the values that can be contributed by a user to a partition. - * Used for SUM. - */ - val minTotalValue: Double? = null, - /** - * The maximum bound on the sum of the values that can be contributed by a user to a partition. - * Used for SUM. - */ - val maxTotalValue: Double? = null, - /** The type of norm. Used for VECTOR_SUM. */ - val vectorNormKind: NormKind? = null, - /** - * The maximum norm of the sum of the vectors that can be contributed by a user to a partition. - * Used for VECTOR_SUM. - */ - val vectorMaxTotalNorm: Double? = null, - /** - * The size of the vectors that can be contributed by a user to a partition. Used for VECTOR_SUM. - */ - val vectorSize: Int? = null, /** * The amount of budget used for partition selection. * @@ -216,26 +186,44 @@ fun validateAggregationParams( } } - // Metrics. - require(!params.metrics.isEmpty()) { "metrics must not be empty." } - require(params.metrics.map { it.type }.distinct().size == params.metrics.size) { - "metrics must not contain duplicate metric types. Provided ${params.metrics.map { it.type }}." + // Metrics & features validation. + require(params.nonFeatureMetrics.isNotEmpty() || params.features.isNotEmpty()) { + "At least one of nonFeatureMetrics or features must be specified." + } + require( + params.nonFeatureMetrics.all { + it.type == MetricType.COUNT || it.type == MetricType.PRIVACY_ID_COUNT + } + ) { + "Only COUNT and PRIVACY_ID_COUNT are allowed in AggregationParams.nonFeatureMetrics. Other metrics should be provided via AggregationParams.features." + } + val featureMetrics = params.features.flatMap { it.metrics } + require( + featureMetrics.none { it.type == MetricType.COUNT || it.type == MetricType.PRIVACY_ID_COUNT } + ) { + "COUNT and PRIVACY_ID_COUNT are not allowed in features. They should be provided via AggregationParams.nonFeatureMetrics." + } + + require( + params.nonFeatureMetrics.map { it.type }.distinct().size == params.nonFeatureMetrics.size + ) { + "nonFeatureMetrics must not contain duplicate metric types. Provided ${params.nonFeatureMetrics.map { it.type }}." } + for (feature in params.features) { + require(feature.metrics.map { it.type }.distinct().size == feature.metrics.size) { + "feature ${feature.featureId} must not contain duplicate metric types. Provided ${feature.metrics.map { it.type }}" + } + } + require(params.features.map { it.featureId }.distinct().size == params.features.size) { + "featureId must be unique. Provided ${params.features.map { it.featureId }}" + } + // Max contributions per partition. require(isGreaterThanZeroIfSet(params.maxContributionsPerPartition)) { "maxContributionsPerPartition must be positive. Provided value: " + "${params.maxContributionsPerPartition}." } - if (params.contributionBoundingLevel.withContributionsPerPartitionBounding) { - require( - params.maxContributionsPerPartition != null || - params.maxContributions != null || - (params.minTotalValue != null && params.maxTotalValue != null) || - params.vectorMaxTotalNorm != null - ) { - "maxContributionsPerPartition or maxContributions or (minTotalValue, maxTotalValue) or vectorMaxTotalNorm must be set because specified ${params.contributionBoundingLevel} contribution bounding level requires per partition bounding." - } - } + // Max contributions. require(isGreaterThanZeroIfSet(params.maxContributions)) { "maxContributions must be positive. Provided value: " + "${params.maxContributions}." @@ -251,52 +239,90 @@ fun validateAggregationParams( "Provided values: maxContributions=${params.maxContributions}, " + "maxContributionsPerPartition=${params.maxContributionsPerPartition}." } + + // Required parameters per each metric. + if (params.contributionBoundingLevel.withContributionsPerPartitionBounding) { + val perPartitionBoundsSet = params.maxContributionsPerPartition != null + val crossPartitionBoundsSet = params.maxContributions != null + val totalValueBoundsSet = + params.features.any { + (it is ScalarFeatureSpec && it.minTotalValue != null && it.maxTotalValue != null) || + it is VectorFeatureSpec + } + require(perPartitionBoundsSet || crossPartitionBoundsSet || totalValueBoundsSet) { + "maxContributionsPerPartition or maxContributions or (minTotalValue, maxTotalValue) or vectorMaxTotalNorm must be set because specified ${params.contributionBoundingLevel} contribution bounding level requires per partition bounding" + } + } + + if (metricIsRequested(COUNT::class, params.nonFeatureMetrics)) { + require(params.maxContributionsPerPartition != null || params.maxContributions != null) { + "maxContributionsPerPartition or maxContributions must be set for COUNT metric." + } + } + + for (feature in params.features) { + when (feature) { + is ScalarFeatureSpec -> validateScalarFeature(params, feature) + is VectorFeatureSpec -> validateVectorFeature(params, feature) + } + } + + // Partition selection + if (usePublicPartitions) { + require(params.partitionSelectionBudget == null) { + "partitionSelectionBudget can not be set for public partitions." + } + } + + // ValueExtractor: only COUNT and PRIVACY_ID_COUNT can be computed w/o a value extractor. + if (!hasValueExtractor) { + require(featureMetrics.isEmpty()) { + "Metrics ${featureMetrics.map { it.type }} require a value extractor." + } + } +} + +private fun validateScalarFeature(params: AggregationParams, feature: ScalarFeatureSpec) { // Min/Max bounds - require(sameNullability(params.minValue, params.maxValue)) { + require(sameNullability(feature.minValue, feature.maxValue)) { "minValue and maxValue must be simultaneously equal or not equal to null. Provided values: " + - "minValue=${params.minValue}, maxValue=${params.maxValue}." + "minValue=${feature.minValue}, maxValue=${feature.maxValue}." } var areMinMaxValuesSet = false - if (params.minValue != null && params.maxValue != null) { + if (feature.minValue != null && feature.maxValue != null) { areMinMaxValuesSet = true - require(params.minValue < params.maxValue) { + require(feature.minValue < feature.maxValue) { "minValue must be less than maxValue. Provided values: " + - "minValue=${params.minValue}, maxValue=${params.maxValue}." + "minValue=${feature.minValue}, maxValue=${feature.maxValue}." } } - require(sameNullability(params.minTotalValue, params.maxTotalValue)) { + require(sameNullability(feature.minTotalValue, feature.maxTotalValue)) { "minTotalValue and maxTotalValue must be simultaneously equal or not equal to null. " + - "Provided values: minTotalValue=${params.minTotalValue}, " + - "maxTotalValue=${params.maxTotalValue}." + "Provided values: minTotalValue=${feature.minTotalValue}, " + + "maxTotalValue=${feature.maxTotalValue}." } var areMinMaxTotalValuesSet = false - if (params.minTotalValue != null && params.maxTotalValue != null) { + if (feature.minTotalValue != null && feature.maxTotalValue != null) { areMinMaxTotalValuesSet = true - require(params.minTotalValue <= params.maxTotalValue) { + require(feature.minTotalValue <= feature.maxTotalValue) { "minTotalValue must be less or equal to maxTotalValue. Provided values: " + - "minTotalValue=${params.minTotalValue}, maxTotalValue=${params.maxTotalValue}." + "minTotalValue=${feature.minTotalValue}, maxTotalValue=${feature.maxTotalValue}." } } - // Required parameters per each metric. - if (metricIsRequested(COUNT::class, params)) { - require(params.maxContributionsPerPartition != null || params.maxContributions != null) { - "maxContributionsPerPartition or maxContributions must be set for COUNT metric." - } - } // When MEAN and SUM are set together, then contribution bounding with (minValue, maxValue) // is used. SUM and VARIANCE should not be set together. if ( - metricIsRequested(SUM::class, params) && - !metricIsRequested(MEAN::class, params) && - !metricIsRequested(VARIANCE::class, params) + metricIsRequested(SUM::class, feature.metrics) && + !metricIsRequested(MEAN::class, feature.metrics) && + !metricIsRequested(VARIANCE::class, feature.metrics) ) { require(areMinMaxTotalValuesSet) { "(minTotalValue, maxTotalValue) must be set for SUM metrics." } } - if (metricIsRequested(MEAN::class, params)) { + if (metricIsRequested(MEAN::class, feature.metrics)) { require(params.maxContributionsPerPartition != null || params.maxContributions != null) { "maxContributionsPerPartition or maxContributions must be set for MEAN metric." } @@ -306,25 +332,25 @@ fun validateAggregationParams( } } require( - params.metrics.find { it.type == COUNT }?.budgetSpec == null || - params.metrics.find { it.type == MEAN }?.budgetSpec == null + params.nonFeatureMetrics.find { it.type == COUNT }?.budgetSpec == null || + feature.metrics.find { it.type == MEAN }?.budgetSpec == null ) { "BudgetPerOpSpec can not be set for both COUNT and MEAN metrics." } require( - params.metrics.find { it.type == SUM }?.budgetSpec == null || - params.metrics.find { it.type == MEAN }?.budgetSpec === null + feature.metrics.find { it.type == SUM }?.budgetSpec == null || + feature.metrics.find { it.type == MEAN }?.budgetSpec === null ) { "BudgetPerOpSpec can not be set for both SUM and MEAN metrics." } require( - params.metrics.find { it.type == MEAN }?.budgetSpec == null || - params.metrics.find { it.type == VARIANCE }?.budgetSpec == null + feature.metrics.find { it.type == MEAN }?.budgetSpec == null || + feature.metrics.find { it.type == VARIANCE }?.budgetSpec == null ) { "BudgetPerOpSpec can not be set for both MEAN and VARIANCE metrics." } // Validation for VARIANCE metric. - if (metricIsRequested(VARIANCE::class, params)) { + if (metricIsRequested(VARIANCE::class, feature.metrics)) { require(params.maxContributionsPerPartition != null || params.maxContributions != null) { "maxContributionsPerPartition or maxContributions must be set for VARIANCE metric." } @@ -334,67 +360,40 @@ fun validateAggregationParams( } } require( - params.metrics.find { it.type == SUM }?.budgetSpec == null || - params.metrics.find { it.type == VARIANCE }?.budgetSpec == null + feature.metrics.find { it.type == SUM }?.budgetSpec == null || + feature.metrics.find { it.type == VARIANCE }?.budgetSpec == null ) { "BudgetPerOpSpec can not be set for both SUM and VARIANCE metrics." } require( - params.metrics.find { it.type == COUNT }?.budgetSpec == null || - params.metrics.find { it.type == VARIANCE }?.budgetSpec == null + params.nonFeatureMetrics.find { it.type == COUNT }?.budgetSpec == null || + feature.metrics.find { it.type == VARIANCE }?.budgetSpec == null ) { "BudgetPerOpSpec can not be set for both COUNT and VARIANCE metrics." } // Validation for QUANTILES metric. - if (metricIsRequested(QUANTILES::class, params)) { + if (metricIsRequested(QUANTILES::class, feature.metrics)) { require(params.maxContributionsPerPartition != null) { "maxContributionsPerPartition must be set for QUANTILES metric." } require(areMinMaxValuesSet) { "(minValue, maxValue) must be set for QUANTILES metric." } } +} + +private fun validateVectorFeature(params: AggregationParams, feature: VectorFeatureSpec) { // Validation for VECTOR_SUM metric. - if (metricIsRequested(VECTOR_SUM::class, params)) { - require(params.vectorNormKind != null) { "vectorNormKind must be set for VECTOR_SUM metric." } + if (metricIsRequested(VECTOR_SUM::class, feature.metrics)) { when (params.noiseKind) { NoiseKind.LAPLACE -> - require(params.vectorNormKind in listOf(NormKind.L_INF, NormKind.L1)) { - "vectorNormKind must be L_INF or L1 for LAPLACE noise. Provided value: ${params.vectorNormKind}." + require(feature.normKind in listOf(NormKind.L_INF, NormKind.L1)) { + "vectorNormKind must be L_INF or L1 for LAPLACE noise. Provided value: ${feature.normKind}." } NoiseKind.GAUSSIAN -> - require(params.vectorNormKind in listOf(NormKind.L_INF, NormKind.L2)) { - "vectorNormKind must be L_INF or L2 for GAUSSIAN noise. Provided value: ${params.vectorNormKind}." + require(feature.normKind in listOf(NormKind.L_INF, NormKind.L2)) { + "vectorNormKind must be L_INF or L2 for GAUSSIAN noise. Provided value: ${feature.normKind}." } } - require(params.vectorMaxTotalNorm != null) { - "vectorMaxTotalNorm must be set for VECTOR_SUM metric." - } - require(params.vectorSize != null) { "vectorSize must be set for VECTOR_SUM metric." } - - require( - !metricIsRequested(SUM::class, params) && - !metricIsRequested(MEAN::class, params) && - !metricIsRequested(VARIANCE::class, params) && - !metricIsRequested(QUANTILES::class, params) - ) { - "VECTOR_SUM can not be computed together with scalar metrics such as SUM, MEAN, VARIANCE and QUANTILES." - } - } - - // Partition selection - if (usePublicPartitions) { - require(params.partitionSelectionBudget == null) { - "partitionSelectionBudget can not be set for public partitions." - } - } - - // ValueExtractor: only COUNT and PRIVACY_ID_COUNT can be computed w/o a value extractor. - if (!hasValueExtractor) { - val metricsWhichRequireValueExtractor = - params.metrics.map { it.type }.filter { it != COUNT && it != PRIVACY_ID_COUNT } - require(metricsWhichRequireValueExtractor.isEmpty()) { - "Metrics $metricsWhichRequireValueExtractor require a value extractor." - } } } @@ -512,6 +511,69 @@ enum class ExecutionMode( ), } +/** + * Represents a feature for which DP metrics are calculated. + * + * A feature is a characteristic of the input data. For example, in a dataset of user activities, a + * feature could be "time spent on page" or "user embedding". This interface and its implementations + * are used to specify parameters for metrics calculated on these features. + */ +@Immutable +sealed interface FeatureSpec : Serializable { + /** A unique identifier for the feature. */ + val featureId: String + /** The list of DP metrics to be computed for this feature. */ + val metrics: ImmutableList +} + +/** + * A [FeatureSpec] for scalar-valued features. + * + * This is used for features where each data point is a single numerical value (e.g., a Double). It + * is suitable for metrics like [MetricType.SUM], [MetricType.MEAN], [MetricType.VARIANCE], and + * [MetricType.QUANTILES]. + * + * @property minValue The minimum value that a single contribution can take. + * @property maxValue The maximum value that a single contribution can take. + * @property minTotalValue The minimum total value that contributions from a single privacy unit can + * sum up to per partition. Must be set if [MetricType.SUM] is requested and neither + * [MetricType.MEAN] nor [MetricType.VARIANCE] is requested; otherwise, [minValue] and [maxValue] + * must be set. + * @property maxTotalValue The maximum total value that contributions from a single privacy unit can + * sum up to per partition. Must be set if [MetricType.SUM] is requested and neither + * [MetricType.MEAN] nor [MetricType.VARIANCE] is requested; otherwise, [minValue] and [maxValue] + * must be set. + */ +@Immutable +data class ScalarFeatureSpec( + override val featureId: String, + override val metrics: ImmutableList, + val minValue: Double? = null, + val maxValue: Double? = null, + val minTotalValue: Double? = null, + val maxTotalValue: Double? = null, +) : FeatureSpec, Serializable + +/** + * A [FeatureSpec] for vector-valued features. + * + * This is used for features where each data point is a vector of numerical values (e.g., an + * embedding). It is suitable for metrics like [MetricType.VECTOR_SUM]. + * + * @property vectorSize The size of the vector. + * @property normKind The type of norm to use for contribution bounding. + * @property vectorMaxTotalNorm The maximum total norm of contributions from a single privacy unit + * per partition. + */ +@Immutable +data class VectorFeatureSpec( + override val featureId: String, + override val metrics: ImmutableList, + val vectorSize: Int, + val normKind: NormKind, + val vectorMaxTotalNorm: Double, +) : FeatureSpec, Serializable + /** The definition of the DP metric to compute. */ @Immutable data class MetricDefinition( @@ -525,6 +587,8 @@ data class MetricDefinition( val budgetSpec: BudgetPerOpSpec? = null, ) : Serializable +// TODO: have 2 types of MetricType feature and non feature for better code +// readability and remove complicated checks. /** The types of metrics that can be anonymized. */ @Immutable sealed class MetricType : Serializable { @@ -570,8 +634,10 @@ private fun sameNullability(a: Double?, b: Double?): Boolean { return (a == null) == (b == null) } -private fun metricIsRequested(metricTypeClass: KClass, params: AggregationParams) = - params.metrics.any { metricTypeClass.isInstance(it.type) } +private fun metricIsRequested( + metricTypeClass: KClass, + metrics: Collection, +) = metrics.any { metricTypeClass.isInstance(it.type) } private fun isGreaterThanZeroIfSet(value: Int?): Boolean = value == null || value > 0 diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionAndPerPartitionSampler.kt b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionAndPerPartitionSampler.kt index bf77c187..33b69809 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionAndPerPartitionSampler.kt +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionAndPerPartitionSampler.kt @@ -17,6 +17,7 @@ package com.google.privacy.differentialprivacy.pipelinedp4j.core import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributions +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.multiValueContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions @@ -96,16 +97,24 @@ class PartitionAndPerPartitionSampler( */ private fun mergeContributions( partitionContributions: Iterable> -): PrivacyIdContributions = privacyIdContributions { - for (partitionContribution in partitionContributions) { - // TODO: Update to add support for multiple features. - // We expect that contribution contains only one feature with featureId="", - // produced by DataExtractors. - val perFeatureValues = partitionContribution.perFeatureValues().single() - if (perFeatureValues.values.size == 1) { - singleValueContributions += perFeatureValues.values - } else { - multiValueContributions += multiValueContribution { values += perFeatureValues.values } +): PrivacyIdContributions { + val allPerFeatureValues = partitionContributions.flatMap { it.perFeatureValues() } + val perFeatureValuesById = allPerFeatureValues.groupBy { it.featureId } + + return privacyIdContributions { + for ((featureId, values) in perFeatureValuesById) { + features += featureContribution { + this.featureId = featureId + for (perFeatureValue in values) { + if (perFeatureValue.values.size == 1) { + singleValueContributions += perFeatureValue.values + } else { + multiValueContributions += multiValueContribution { + this.values += perFeatureValue.values + } + } + } + } } } } diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/proto/accumulators.proto b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/proto/accumulators.proto index 804b861c..fffce225 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/proto/accumulators.proto +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/proto/accumulators.proto @@ -29,34 +29,30 @@ message CountAccumulator { message SumAccumulator { double sum = 1; + string feature_id = 2; } message VectorSumAccumulator { repeated double sums_per_dimension = 1; + string feature_id = 2; } message MeanAccumulator { int64 count = 1; double normalized_sum = 2; + string feature_id = 3; } message QuantilesAccumulator { bytes serialized_quantiles_summary = 1; + string feature_id = 2; } message VarianceAccumulator { int64 count = 1; double normalized_sum = 2; double normalized_sum_squares = 3; -} - -message FeatureAccumulator { - string feature_id = 1; - SumAccumulator sum = 2; - MeanAccumulator mean = 3; - VarianceAccumulator variance = 4; - QuantilesAccumulator quantiles = 5; - VectorSumAccumulator vector_sum = 6; + string feature_id = 4; } // A collection of accumulators for all metrics that can possibly be computed. @@ -65,10 +61,9 @@ message FeatureAccumulator { message CompoundAccumulator { PrivacyIdCountAccumulator privacy_id_count_accumulator = 3; CountAccumulator count_accumulator = 1; - SumAccumulator sum_accumulator = 2; - VectorSumAccumulator vector_sum_accumulator = 7; - MeanAccumulator mean_accumulator = 4; - QuantilesAccumulator quantiles_accumulator = 5; - VarianceAccumulator variance_accumulator = 6; - repeated FeatureAccumulator feature_accumulators = 8; + repeated SumAccumulator sum_accumulators = 2; + repeated MeanAccumulator mean_accumulators = 4; + repeated VarianceAccumulator variance_accumulators = 5; + repeated QuantilesAccumulator quantiles_accumulators = 6; + repeated VectorSumAccumulator vector_sum_accumulators = 7; } diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/proto/dpaggregates.proto b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/proto/dpaggregates.proto index 17c03fad..05948ea6 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/proto/dpaggregates.proto +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/proto/dpaggregates.proto @@ -39,12 +39,6 @@ message PerFeature { message DpAggregates { double privacy_id_count = 3; double count = 1; - double sum = 2; - repeated double vector_sum = 7; - double mean = 4; - // Sorted by rank. - repeated double quantiles = 5; - double variance = 6; repeated PerFeature per_feature = 8; } @@ -55,16 +49,14 @@ message PrivacyIdContributions { } message FeatureContribution { string feature_id = 1; + // Used for metrics like SUM where a single contribution has + // a single double value. repeated double single_value_contributions = 2; + // Used for metrics like VECTOR_SUM where a single contribution has + // multiple double values. repeated MultiValueContribution multi_value_contributions = 3; } - // Used for metrics like VECTOR_SUM where a single contribution has - // multiple double values. - repeated MultiValueContribution multi_value_contributions = 1; - // Used for metrics like SUM where a single contribution has - // a single double value. - repeated double single_value_contributions = 2; // User contributions per each feature. - repeated FeatureContribution features = 3; + repeated FeatureContribution features = 1; } diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/beam/BeamEncodersTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/beam/BeamEncodersTest.kt index 7d6fdf95..eebdc2fd 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/beam/BeamEncodersTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/beam/BeamEncodersTest.kt @@ -98,12 +98,17 @@ class BeamEncodersTest { val input = listOf( compoundAccumulator { - sumAccumulator = sumAccumulator { sum = -123.0 } - meanAccumulator = meanAccumulator { + sumAccumulators += sumAccumulator { + featureId = "" + sum = -123.0 + } + meanAccumulators += meanAccumulator { + featureId = "" count = 12 normalizedSum = -1.543 } - quantilesAccumulator = quantilesAccumulator { + quantilesAccumulators += quantilesAccumulator { + featureId = "" serializedQuantilesSummary = ByteString.copyFrom(byteArrayOf(0x48, 0x65, 0x6c, 0x6c, 0x6f)) } diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CompoundCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CompoundCombinerTest.kt index b532db6d..e1959fcc 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CompoundCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CompoundCombinerTest.kt @@ -25,10 +25,12 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.core.MetricType.VARIA import com.google.privacy.differentialprivacy.pipelinedp4j.core.budget.AllocatedBudget import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.NoiseFactory import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.ZeroNoiseFactory +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.compoundAccumulator import com.google.privacy.differentialprivacy.pipelinedp4j.proto.countAccumulator import com.google.privacy.differentialprivacy.pipelinedp4j.proto.dpAggregates import com.google.privacy.differentialprivacy.pipelinedp4j.proto.meanAccumulator +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.perFeature import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdCountAccumulator import com.google.privacy.differentialprivacy.pipelinedp4j.proto.sumAccumulator @@ -42,37 +44,58 @@ import org.junit.runners.JUnit4 class CompoundCombinerTest { private val COUNT_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = NoiseKind.GAUSSIAN, maxPartitionsContributed = Int.MAX_VALUE, maxContributionsPerPartition = Int.MAX_VALUE, ) private val COUNT_AND_SUM_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(SUM)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -Double.MAX_VALUE, + maxTotalValue = Double.MAX_VALUE, + ) + ), noiseKind = NoiseKind.GAUSSIAN, maxPartitionsContributed = Int.MAX_VALUE, maxContributionsPerPartition = Int.MAX_VALUE, - minTotalValue = -Double.MAX_VALUE, - maxTotalValue = Double.MAX_VALUE, ) private val COUNT_AND_MEAN_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(MEAN)), + minValue = -100.0, + maxValue = 100.0, + ) + ), noiseKind = NoiseKind.GAUSSIAN, maxPartitionsContributed = 100, maxContributionsPerPartition = 10, - minValue = -100.0, - maxValue = 100.0, ) private val COUNT_AND_VARIANCE_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(VARIANCE)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = -100.0, + maxValue = 100.0, + ) + ), noiseKind = NoiseKind.GAUSSIAN, maxPartitionsContributed = 100, maxContributionsPerPartition = 10, - minValue = -100.0, - maxValue = 100.0, ) private val UNUSED_ALLOCATED_BUDGET = AllocatedBudget() @@ -96,7 +119,12 @@ class CompoundCombinerTest { val accumulator = compoundCombiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(10.0, 10.0, 10.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(10.0, 10.0, 10.0) + } + } ) assertThat(accumulator) @@ -119,20 +147,29 @@ class CompoundCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_AND_SUM_PARAMS.features[0] as ScalarFeatureSpec, ), ) ) val accumulator = compoundCombiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(10.0, 10.0, 10.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(10.0, 10.0, 10.0) + } + } ) assertThat(accumulator) .isEqualTo( compoundAccumulator { countAccumulator = countAccumulator { count = 3 } - sumAccumulator = sumAccumulator { sum = 30.0 } + sumAccumulators += sumAccumulator { + featureId = "value" + sum = 30.0 + } } ) } @@ -148,19 +185,26 @@ class CompoundCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_AND_MEAN_PARAMS.features[0] as ScalarFeatureSpec, ) ) ) val accumulator = compoundCombiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(5.0, 10.5, 19.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(5.0, 10.5, 19.0) + } + } ) assertThat(accumulator) .isEqualTo( compoundAccumulator { - meanAccumulator = meanAccumulator { + meanAccumulators += meanAccumulator { + featureId = "value" count = 3 normalizedSum = 34.5 } @@ -174,25 +218,32 @@ class CompoundCombinerTest { CompoundCombiner( listOf( VarianceCombiner( - COUNT_AND_MEAN_PARAMS, + COUNT_AND_VARIANCE_PARAMS, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_AND_VARIANCE_PARAMS.features[0] as ScalarFeatureSpec, ) ) ) val accumulator = compoundCombiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(5.0, 10.5, 19.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(5.0, 10.5, 19.0) + } + } ) assertThat(accumulator) .isEqualTo( compoundAccumulator { - varianceAccumulator = varianceAccumulator { + varianceAccumulators += varianceAccumulator { + featureId = "value" count = 3 normalizedSum = 34.5 normalizedSumSquares = 496.25 @@ -217,6 +268,7 @@ class CompoundCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_AND_SUM_PARAMS.features[0] as ScalarFeatureSpec, ), ) ) @@ -225,11 +277,17 @@ class CompoundCombinerTest { compoundCombiner.mergeAccumulators( compoundAccumulator { countAccumulator = countAccumulator { count = 1 } - sumAccumulator = sumAccumulator { sum = 10.0 } + sumAccumulators += sumAccumulator { + featureId = "value" + sum = 10.0 + } }, compoundAccumulator { countAccumulator = countAccumulator { count = 2 } - sumAccumulator = sumAccumulator { sum = 20.0 } + sumAccumulators += sumAccumulator { + featureId = "value" + sum = 20.0 + } }, ) @@ -237,7 +295,10 @@ class CompoundCombinerTest { .isEqualTo( compoundAccumulator { countAccumulator = countAccumulator { count = 3 } - sumAccumulator = sumAccumulator { sum = 30.0 } + sumAccumulators += sumAccumulator { + featureId = "value" + sum = 30.0 + } } ) } @@ -277,6 +338,7 @@ class CompoundCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_AND_MEAN_PARAMS.features[0] as ScalarFeatureSpec, ) ) ) @@ -284,13 +346,15 @@ class CompoundCombinerTest { val mergedAccumulator = compoundCombiner.mergeAccumulators( compoundAccumulator { - meanAccumulator = meanAccumulator { + meanAccumulators += meanAccumulator { + featureId = "value" count = 1 normalizedSum = 10.0 } }, compoundAccumulator { - meanAccumulator = meanAccumulator { + meanAccumulators += meanAccumulator { + featureId = "value" count = 2 normalizedSum = 20.0 } @@ -300,7 +364,8 @@ class CompoundCombinerTest { assertThat(mergedAccumulator) .isEqualTo( compoundAccumulator { - meanAccumulator = meanAccumulator { + meanAccumulators += meanAccumulator { + featureId = "value" count = 3 normalizedSum = 30.0 } @@ -320,6 +385,7 @@ class CompoundCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_AND_VARIANCE_PARAMS.features[0] as ScalarFeatureSpec, ) ) ) @@ -327,14 +393,16 @@ class CompoundCombinerTest { val mergedAccumulator = compoundCombiner.mergeAccumulators( compoundAccumulator { - varianceAccumulator = varianceAccumulator { + varianceAccumulators += varianceAccumulator { + featureId = "value" count = 1 normalizedSum = 10.0 normalizedSumSquares = 100.0 } }, compoundAccumulator { - varianceAccumulator = varianceAccumulator { + varianceAccumulators += varianceAccumulator { + featureId = "value" count = 2 normalizedSum = 20.0 normalizedSumSquares = 200.0 @@ -345,7 +413,8 @@ class CompoundCombinerTest { assertThat(mergedAccumulator) .isEqualTo( compoundAccumulator { - varianceAccumulator = varianceAccumulator { + varianceAccumulators += varianceAccumulator { + featureId = "value" count = 3 normalizedSum = 30.0 normalizedSumSquares = 300.0 @@ -370,6 +439,7 @@ class CompoundCombinerTest { UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_AND_SUM_PARAMS.features[0] as ScalarFeatureSpec, ), ) ) @@ -378,7 +448,10 @@ class CompoundCombinerTest { compoundCombiner.computeMetrics( compoundAccumulator { countAccumulator = countAccumulator { count = 3 } - sumAccumulator = sumAccumulator { sum = 30.0 } + sumAccumulators += sumAccumulator { + featureId = "value" + sum = 30.0 + } } ) @@ -386,7 +459,10 @@ class CompoundCombinerTest { .isEqualTo( dpAggregates { count = 3.0 - sum = 30.0 + perFeature += perFeature { + sum = 30.0 + featureId = "value" + } } ) } @@ -415,58 +491,77 @@ class CompoundCombinerTest { @Test fun computeMetrics_meanCombiner_returnsMeanMetric() { - val compoundCombiner = + val params = COUNT_AND_MEAN_PARAMS.copy(nonFeatureMetrics = ImmutableList.of()) + val combiner = CompoundCombiner( listOf( MeanCombiner( - COUNT_AND_MEAN_PARAMS.copy(metrics = ImmutableList.of(MetricDefinition(MEAN))), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) ) ) val dpAggregates = - compoundCombiner.computeMetrics( + combiner.computeMetrics( compoundAccumulator { - meanAccumulator = meanAccumulator { + meanAccumulators += meanAccumulator { + featureId = "value" count = 3 normalizedSum = 30.0 } } ) - assertThat(dpAggregates).isEqualTo(dpAggregates { mean = 10.0 }) + assertThat(dpAggregates) + .isEqualTo( + dpAggregates { + perFeature += perFeature { + mean = 10.0 + featureId = "value" + } + } + ) } @Test fun computeMetrics_meanCombiner_returnsCountSumMean() { - val compoundCombiner = + val params = + COUNT_AND_MEAN_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), + minValue = -100.0, + maxValue = 100.0, + ) + ), + ) + val combiner = CompoundCombiner( listOf( MeanCombiner( - COUNT_AND_MEAN_PARAMS.copy( - metrics = - ImmutableList.of( - MetricDefinition(MEAN), - MetricDefinition(COUNT), - MetricDefinition(SUM), - ) - ), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) ) ) val dpAggregates = - compoundCombiner.computeMetrics( + combiner.computeMetrics( compoundAccumulator { - meanAccumulator = meanAccumulator { + meanAccumulators += meanAccumulator { + featureId = "value" count = 3 normalizedSum = 30.0 } @@ -477,32 +572,38 @@ class CompoundCombinerTest { .isEqualTo( dpAggregates { count = 3.0 - sum = 30.0 - mean = 10.0 + perFeature += perFeature { + sum = 30.0 + mean = 10.0 + featureId = "value" + } } ) } @Test fun computeMetrics_varianceCombiner_returnsVarianceMetric() { - val compoundCombiner = + val params = COUNT_AND_VARIANCE_PARAMS.copy(nonFeatureMetrics = ImmutableList.of()) + val combiner = CompoundCombiner( listOf( VarianceCombiner( - COUNT_AND_VARIANCE_PARAMS.copy(metrics = ImmutableList.of(MetricDefinition(VARIANCE))), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) ) ) val dpAggregates = - compoundCombiner.computeMetrics( + combiner.computeMetrics( compoundAccumulator { - varianceAccumulator = varianceAccumulator { + varianceAccumulators += varianceAccumulator { + featureId = "value" count = 10 normalizedSum = 120.0 normalizedSumSquares = 1500.0 @@ -510,37 +611,57 @@ class CompoundCombinerTest { } ) - assertThat(dpAggregates).isEqualTo(dpAggregates { variance = 6.0 }) + assertThat(dpAggregates) + .isEqualTo( + dpAggregates { + perFeature += perFeature { + variance = 6.0 + featureId = "value" + } + } + ) } @Test fun computeMetrics_varianceCombiner_returnsCountSumMeanVariance() { - val compoundCombiner = - CompoundCombiner( - listOf( - VarianceCombiner( - COUNT_AND_VARIANCE_PARAMS.copy( + val params = + COUNT_AND_VARIANCE_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", metrics = ImmutableList.of( MetricDefinition(MEAN), - MetricDefinition(COUNT), MetricDefinition(SUM), MetricDefinition(VARIANCE), - ) - ), + ), + minValue = -100.0, + maxValue = 100.0, + ) + ), + ) + val combiner = + CompoundCombiner( + listOf( + VarianceCombiner( + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) ) ) val dpAggregates = - compoundCombiner.computeMetrics( + combiner.computeMetrics( compoundAccumulator { - varianceAccumulator = varianceAccumulator { + varianceAccumulators += varianceAccumulator { + featureId = "value" count = 10 normalizedSum = 120.0 normalizedSumSquares = 1500.0 @@ -552,9 +673,12 @@ class CompoundCombinerTest { .isEqualTo( dpAggregates { count = 10.0 - sum = 120.0 - mean = 12.0 - variance = 6.0 + perFeature += perFeature { + sum = 120.0 + mean = 12.0 + variance = 6.0 + featureId = "value" + } } ) } @@ -565,7 +689,12 @@ class CompoundCombinerTest { val accumulator = compoundCombiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(10.0, 5.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(10.0, 5.0) + } + } ) assertThat(accumulator) diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CountCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CountCombinerTest.kt index faff1082..e0d95652 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CountCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CountCombinerTest.kt @@ -27,6 +27,7 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.core.NoiseKind.GAUSSI import com.google.privacy.differentialprivacy.pipelinedp4j.core.budget.AllocatedBudget import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.NoiseFactory import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.ZeroNoiseFactory +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.multiValueContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.countAccumulator import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions @@ -41,7 +42,7 @@ import org.mockito.kotlin.verify class CountCombinerTest { private val AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 3, maxContributionsPerPartition = 5, @@ -72,7 +73,12 @@ class CountCombinerTest { val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(1.0, 1.0, 1.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(1.0, 1.0, 1.0) + } + } ) assertThat(accumulator).isEqualTo(countAccumulator { count = 3 }) @@ -86,12 +92,15 @@ class CountCombinerTest { val accumulator = combiner.createAccumulator( privacyIdContributions { - multiValueContributions += - listOf( - multiValueContribution { values += listOf(1.0, 1.0, 1.0) }, - multiValueContribution { values += listOf(2.0, 2.0, 2.0) }, - multiValueContribution { values += listOf(3.0, 3.0, 3.0) }, - ) + features += featureContribution { + featureId = "value" + multiValueContributions += + listOf( + multiValueContribution { values.addAll(listOf(1.0, 1.0, 1.0)) }, + multiValueContribution { values.addAll(listOf(2.0, 2.0, 2.0)) }, + multiValueContribution { values.addAll(listOf(3.0, 3.0, 3.0)) }, + ) + } } ) @@ -113,7 +122,12 @@ class CountCombinerTest { val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(1.0, 1.0, 1.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(1.0, 1.0, 1.0) + } + } ) assertThat(accumulator).isEqualTo(countAccumulator { count = 2 }) @@ -134,7 +148,12 @@ class CountCombinerTest { val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(1.0, 1.0, 1.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(1.0, 1.0, 1.0) + } + } ) assertThat(accumulator).isEqualTo(countAccumulator { count = 3 }) @@ -156,7 +175,7 @@ class CountCombinerTest { fun computeMetrics_addsNoise(noiseKind: NoiseKind, delta: Double) { val paramsWithNoise = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = noiseKind, maxPartitionsContributed = 30, maxContributionsPerPartition = 50, @@ -175,7 +194,7 @@ class CountCombinerTest { fun computeMetrics_passesCorrectParametersToNoise() { val params = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 3, maxContributionsPerPartition = 5, @@ -207,10 +226,22 @@ class CountCombinerTest { val accumulator0 = combiner.emptyAccumulator() val accumulator1 = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(0.0, 0.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(0.0, 0.0) + } + } ) val accumulator2 = - combiner.createAccumulator(privacyIdContributions { singleValueContributions += listOf(0.0) }) + combiner.createAccumulator( + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(0.0) + } + } + ) val accumulator3 = combiner.mergeAccumulators(accumulator0, accumulator1) val finalAccumulator = combiner.mergeAccumulators(accumulator2, accumulator3) val result = combiner.computeMetrics(finalAccumulator) diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngineTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngineTest.kt index 5753b863..1de92799 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngineTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngineTest.kt @@ -44,6 +44,7 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalTable import com.google.privacy.differentialprivacy.pipelinedp4j.local.createLocalEngine import com.google.privacy.differentialprivacy.pipelinedp4j.proto.DpAggregates import com.google.privacy.differentialprivacy.pipelinedp4j.proto.dpAggregates +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.perFeature import com.google.testing.junit.testparameterinjector.TestParameter import com.google.testing.junit.testparameterinjector.TestParameterInjector import com.google.testing.junit.testparameterinjector.TestParameters @@ -104,12 +105,14 @@ class DpEngineTest { .aggregate( LocalCollection(sequenceOf()), // empty metrics are not allowed - COUNT_PARAMS.copy(metrics = ImmutableList.of()), + COUNT_PARAMS.copy(nonFeatureMetrics = ImmutableList.of()), testDataExtractors, LocalCollection(sequenceOf()), ) } - assertThat(e).hasMessageThat().contains("metrics must not be empty") + assertThat(e) + .hasMessageThat() + .contains("At least one of nonFeatureMetrics or features must be specified.") } @Test @@ -188,7 +191,17 @@ class DpEngineTest { ) val publicPartitions = LocalCollection(sequenceOf("US")) val dpEngine = DpEngine.createForTesting(LOCAL_EF, LARGE_BUDGET_SPEC, ZeroNoiseFactory()) - val params = COUNT_AND_SUM_PARAMS.copy(maxContributionsPerPartition = 2, maxTotalValue = 30.0) + val params = + COUNT_AND_SUM_PARAMS.copy( + maxContributionsPerPartition = 2, + features = + ImmutableList.of( + (COUNT_AND_SUM_PARAMS.features[0] as ScalarFeatureSpec).copy( + minTotalValue = -Double.MAX_VALUE, + maxTotalValue = 30.0, + ) + ), + ) val dpAggregates = dpEngine.aggregate(inputData, params, testDataExtractors, publicPartitions) @@ -201,7 +214,10 @@ class DpEngineTest { "US", dpAggregates { count = 2.0 - sum = 30.0 + perFeature += perFeature { + sum = 30.0 + featureId = "value" + } }, ) ) @@ -220,7 +236,16 @@ class DpEngineTest { ) val publicPartitions = LocalCollection(sequenceOf("US", "NL")) val dpEngine = DpEngine.createForTesting(LOCAL_EF, LARGE_BUDGET_SPEC, ZeroNoiseFactory()) - val params = COUNT_AND_SUM_PARAMS.copy(minTotalValue = -25.0, maxTotalValue = 25.0) + val params = + COUNT_AND_SUM_PARAMS.copy( + features = + ImmutableList.of( + (COUNT_AND_SUM_PARAMS.features[0] as ScalarFeatureSpec).copy( + minTotalValue = -25.0, + maxTotalValue = 25.0, + ) + ) + ) val dpAggregates = dpEngine.aggregate(inputData, params, testDataExtractors, publicPartitions) @@ -233,14 +258,20 @@ class DpEngineTest { "US", dpAggregates { count = 2.0 - sum = 25.0 + perFeature += perFeature { + sum = 25.0 + featureId = "value" + } }, ), Pair( "NL", dpAggregates { count = 2.0 - sum = -25.0 + perFeature += perFeature { + sum = -25.0 + featureId = "value" + } }, ), ) @@ -255,17 +286,23 @@ class DpEngineTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = + nonFeatureMetrics = ImmutableList.of( MetricDefinition(COUNT, AbsoluteBudgetPerOpSpec(0.1, 1e-5)), - MetricDefinition(SUM, AbsoluteBudgetPerOpSpec(0.1, 1e-5)), MetricDefinition(PRIVACY_ID_COUNT, AbsoluteBudgetPerOpSpec(0.1, 1e-5)), ), noiseKind = GAUSSIAN, maxPartitionsContributed = 5, maxContributionsPerPartition = 5, - minTotalValue = -5.0, - maxTotalValue = 5.0, + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM, AbsoluteBudgetPerOpSpec(0.1, 1e-5))), + minTotalValue = -5.0, + maxTotalValue = 5.0, + ) + ), ) val dpAggregates = @@ -278,8 +315,8 @@ class DpEngineTest { assertThat(dpAggregates.data.toMap()["US"]!!.count) .isNotEqualTo(dpAggregatesAnotherRun.data.toMap()["US"]!!.count) - assertThat(dpAggregates.data.toMap()["US"]!!.sum) - .isNotEqualTo(dpAggregatesAnotherRun.data.toMap()["US"]!!.sum) + assertThat(dpAggregates.data.toMap()["US"]!!.perFeatureList.first().sum) + .isNotEqualTo(dpAggregatesAnotherRun.data.toMap()["US"]!!.perFeatureList.first().sum) assertThat(dpAggregates.data.toMap()["US"]!!.privacyIdCount) .isNotEqualTo(dpAggregatesAnotherRun.data.toMap()["US"]!!.privacyIdCount) } @@ -293,13 +330,19 @@ class DpEngineTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = - ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(SUM), MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM), MetricDefinition(MEAN)), + minValue = -2.0, + maxValue = 2.0, + ) + ), noiseKind = LAPLACE, maxPartitionsContributed = 1, maxContributionsPerPartition = 1, - minValue = -2.0, - maxValue = 2.0, ) val dpAggregates = @@ -308,9 +351,12 @@ class DpEngineTest { dpEngine.done() val partitionResult = dpAggregates.data.toMap()["US"]!! + val perFeatureResult = partitionResult.perFeatureList.first() assertThat(partitionResult.count).isWithin(1e-1).of(2.0) - assertThat(partitionResult.sum).isWithin(1e-1).of(3.0) - assertThat(partitionResult.mean).isWithin(1e-10).of(partitionResult.sum / partitionResult.count) + assertThat(perFeatureResult.sum).isWithin(1e-1).of(3.0) + assertThat(perFeatureResult.mean) + .isWithin(1e-10) + .of(perFeatureResult.sum / partitionResult.count) } @Test @@ -322,18 +368,24 @@ class DpEngineTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = ImmutableList.of( - MetricDefinition(COUNT), - MetricDefinition(SUM), - MetricDefinition(MEAN), - MetricDefinition(VARIANCE), + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of( + MetricDefinition(SUM), + MetricDefinition(MEAN), + MetricDefinition(VARIANCE), + ), + minValue = -2.0, + maxValue = 2.0, + ) ), noiseKind = LAPLACE, maxPartitionsContributed = 1, maxContributionsPerPartition = 1, - minValue = -2.0, - maxValue = 2.0, ) val dpAggregates = dpEngine.aggregate(inputData, params, testDataExtractors, publicPartitions) @@ -341,10 +393,13 @@ class DpEngineTest { dpEngine.done() val partitionResult = dpAggregates.data.toMap()["US"]!! + val perFeatureResult = partitionResult.perFeatureList.first() assertThat(partitionResult.count).isWithin(1e-1).of(2.0) - assertThat(partitionResult.sum).isWithin(1e-1).of(3.0) - assertThat(partitionResult.mean).isWithin(1e-10).of(partitionResult.sum / partitionResult.count) - assertThat(partitionResult.variance) + assertThat(perFeatureResult.sum).isWithin(1e-1).of(3.0) + assertThat(perFeatureResult.mean) + .isWithin(1e-10) + .of(perFeatureResult.sum / partitionResult.count) + assertThat(perFeatureResult.variance) .isWithin(1e-1) .of(((1.0 * 1.0) + (2.0 * 2.0)) / 2.0 - (3.0 / 2.0) * (3.0 / 2.0)) } @@ -426,18 +481,24 @@ class DpEngineTest { AggregationParams( contributionBoundingLevel = DATASET_LEVEL, noiseKind = GAUSSIAN, - metrics = + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT), MetricDefinition(COUNT)), + features = ImmutableList.of( - MetricDefinition(PRIVACY_ID_COUNT), - MetricDefinition(COUNT), - MetricDefinition(SUM), - MetricDefinition(MEAN), - MetricDefinition(QUANTILES(ranks = ImmutableList.of())), + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of( + MetricDefinition(SUM), + MetricDefinition(MEAN), + MetricDefinition(QUANTILES(ranks = ImmutableList.of())), + ), + minValue = -10.0, + maxValue = 10.0, + ) ), maxPartitionsContributed = 10, maxContributionsPerPartition = 20, - minValue = -10.0, - maxValue = 10.0, ), PartitionAndPerPartitionSampler::class.java, ), @@ -471,18 +532,24 @@ class DpEngineTest { AggregationParams( contributionBoundingLevel = PARTITION_LEVEL, noiseKind = GAUSSIAN, - metrics = + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT), MetricDefinition(COUNT)), + features = ImmutableList.of( - MetricDefinition(PRIVACY_ID_COUNT), - MetricDefinition(COUNT), - MetricDefinition(SUM), - MetricDefinition(MEAN), - MetricDefinition(QUANTILES(ranks = ImmutableList.of())), + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of( + MetricDefinition(SUM), + MetricDefinition(MEAN), + MetricDefinition(QUANTILES(ranks = ImmutableList.of())), + ), + minValue = -10.0, + maxValue = 10.0, + ) ), maxPartitionsContributed = 1, maxContributionsPerPartition = 20, - minValue = -10.0, - maxValue = 10.0, ), PerPartitionContributionsSampler::class.java, ), @@ -890,18 +957,22 @@ class DpEngineTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(COUNT, testCase.requestedCountBudget)), + features = ImmutableList.of( - MetricDefinition(COUNT, testCase.requestedCountBudget), - MetricDefinition(SUM, testCase.requestedSumBudget), + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM, testCase.requestedSumBudget)), + minTotalValue = -100.0, + maxTotalValue = 100.0, + ) ), noiseKind = GAUSSIAN, // Choose large values to avoid contribution clamping but keep the values low enough to // avoid sensitivity overflow. maxPartitionsContributed = 100, maxContributionsPerPartition = 100, - minTotalValue = -100.0, - maxTotalValue = 100.0, ) val result = @@ -1032,17 +1103,24 @@ class DpEngineTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(COUNT, testCase.requestedCountBudget)), + features = ImmutableList.of( - MetricDefinition(COUNT, testCase.requestedCountBudget), - MetricDefinition(SUM, testCase.requestedSumBudget), - MetricDefinition(MEAN, testCase.requestedMeanBudget), + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of( + MetricDefinition(SUM, testCase.requestedSumBudget), + MetricDefinition(MEAN, testCase.requestedMeanBudget), + ), + minValue = -10.0, + maxValue = 10.0, + ) ), noiseKind = GAUSSIAN, maxPartitionsContributed = 10, maxContributionsPerPartition = 20, - minValue = -10.0, - maxValue = 10.0, ) val result = @@ -1133,12 +1211,20 @@ class DpEngineTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(VARIANCE, testCase.requestedVarianceBudget)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of(MetricDefinition(VARIANCE, testCase.requestedVarianceBudget)), + minValue = -10.0, + maxValue = 10.0, + ) + ), noiseKind = GAUSSIAN, maxPartitionsContributed = 10, maxContributionsPerPartition = 20, - minValue = -10.0, - maxValue = 10.0, ) val result = @@ -1189,57 +1275,87 @@ class DpEngineTest { DpEngineBudgetSpec(budget = TotalBudget(epsilon = 2000.0, delta = 0.999999)) private val PRIVACY_ID_COUNT_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 1_000_000, maxContributionsPerPartition = 1_000_000, ) private val COUNT_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 1_000_000, maxContributionsPerPartition = 1_000_000, ) private val SUM_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(SUM)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -Double.MAX_VALUE, + maxTotalValue = Double.MAX_VALUE, + ) + ), noiseKind = GAUSSIAN, maxPartitionsContributed = 1_000_000, - minTotalValue = -Double.MAX_VALUE, - maxTotalValue = Double.MAX_VALUE, ) private val COUNT_AND_SUM_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(SUM)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -100.0, + maxTotalValue = 100.0, + ) + ), noiseKind = GAUSSIAN, // Choose large values to avoid contribution clamping but keep the values low enough to // avoid sensitivity overflow. maxPartitionsContributed = 100, maxContributionsPerPartition = 100, - minTotalValue = -100.0, - maxTotalValue = 100.0, ) private val MEAN_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(MEAN)), + minValue = -10.0, + maxValue = 10.0, + ) + ), noiseKind = GAUSSIAN, maxPartitionsContributed = 10, maxContributionsPerPartition = 20, - minValue = -10.0, - maxValue = 10.0, ) private val QUANTILES_PARAMS = AggregationParams( - metrics = + nonFeatureMetrics = ImmutableList.of(), + features = ImmutableList.of( - MetricDefinition(QUANTILES(ranks = ImmutableList.of(0.0001, 0.0, 0.5, 0.999, 1.0))) + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of( + MetricDefinition( + QUANTILES(ranks = ImmutableList.of(0.0001, 0.0, 0.5, 0.999, 1.0)) + ) + ), + minValue = -10.0, + maxValue = 10.0, + ) ), noiseKind = GAUSSIAN, maxPartitionsContributed = 10, maxContributionsPerPartition = 20, - minValue = -10.0, - maxValue = 10.0, ) private val LOCAL_EF = LocalEncoderFactory() } diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParamsTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParamsTest.kt index 7bc5f472..1c33b01c 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParamsTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParamsTest.kt @@ -56,12 +56,16 @@ class DpFunctionsParamsTest { ) validateAggregationParams( AGGREGATION_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(), maxContributionsPerPartition = null, - metrics = ImmutableList.of(MetricDefinition(SUM)), - minValue = null, - maxValue = null, - minTotalValue = 1.0, - maxTotalValue = 2.0, + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = 1.0, + maxTotalValue = 2.0, + ) + ), ), usePublicPartitions = true, hasValueExtractor = true, @@ -69,15 +73,17 @@ class DpFunctionsParamsTest { validateAggregationParams( AGGREGATION_PARAMS.copy( maxContributionsPerPartition = null, - metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + createVectorFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 2, + vectorMaxTotalNorm = 1.0, + normKind = NormKind.L1, + ) + ), noiseKind = NoiseKind.LAPLACE, - minValue = null, - maxValue = null, - minTotalValue = null, - maxTotalValue = null, - vectorSize = 2, - vectorMaxTotalNorm = 1.0, - vectorNormKind = NormKind.L1, ), usePublicPartitions = false, hasValueExtractor = true, @@ -94,6 +100,11 @@ class DpFunctionsParamsTest { aggregationParams = AGGREGATION_PARAMS.copy(maxPartitionsContributed = 0), exceptionMessage = "maxPartitionsContributed must be positive. Provided value: 0.", ), + OVER_LIMIT_MAX_PARTITIONS_CONTRIBUTED( + aggregationParams = AGGREGATION_PARAMS.copy(maxPartitionsContributed = 110_000_000), + exceptionMessage = + "maxPartitionsContributed must be less than 100000000 Provided values: maxPartitionsContributed=110000000", + ), PARTITION_LEVEL_CONTRIBUTION_BOUNDING_MAX_PARTITIONS_CONTRIBUTED( aggregationParams = AGGREGATION_PARAMS.copy( @@ -118,8 +129,33 @@ class DpFunctionsParamsTest { exceptionMessage = "preThreshold must be positive. Provided value: 0", ), NO_METRICS( - aggregationParams = AGGREGATION_PARAMS.copy(metrics = ImmutableList.of()), - exceptionMessage = "metrics must not be empty.", + aggregationParams = + AGGREGATION_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(), + features = ImmutableList.of(), + ), + exceptionMessage = "At least one of nonFeatureMetrics or features must be specified.", + ), + INVALID_NON_FEATURE_METRIC( + aggregationParams = + AGGREGATION_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(SUM)), + features = ImmutableList.of(), + ), + exceptionMessage = + "Only COUNT and PRIVACY_ID_COUNT are allowed in AggregationParams.nonFeatureMetrics. Other metrics should be provided via AggregationParams.features.", + ), + FEATURE_METRIC_IS_COUNT( + aggregationParams = + AGGREGATION_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + createScalarFeatureSpec(nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT))) + ), + ), + exceptionMessage = + "COUNT and PRIVACY_ID_COUNT are not allowed in features. They should be provided via AggregationParams.nonFeatureMetrics.", ), ZERO_MAX_CONTRIBUTIONS_PER_PARTITION( aggregationParams = AGGREGATION_PARAMS.copy(maxContributionsPerPartition = 0), @@ -180,90 +216,132 @@ class DpFunctionsParamsTest { "maxContributions and maxPartitionsContributed are mutually exclusive. Provided values: maxContributions=1, maxPartitionsContributed=1", ), MIN_VALUE_SET_MAX_VALUE_NOT_SET( - aggregationParams = AGGREGATION_PARAMS.copy(minValue = 1.0, maxValue = null), + aggregationParams = + AGGREGATION_PARAMS.copy( + features = ImmutableList.of(createScalarFeatureSpec(minValue = 1.0, maxValue = null)) + ), exceptionMessage = "minValue and maxValue must be simultaneously equal or not equal to null.", ), MIN_VALUE_NOT_SET_MAX_VALUE_SET( - aggregationParams = AGGREGATION_PARAMS.copy(minValue = null, maxValue = 2.0), + aggregationParams = + AGGREGATION_PARAMS.copy( + features = ImmutableList.of(createScalarFeatureSpec(minValue = null, maxValue = 2.0)) + ), exceptionMessage = "minValue and maxValue must be simultaneously equal or not equal to " + "null. Provided values: minValue=null, maxValue=2.0", ), MIN_VALUE_GREATER_THAN_MAX_VALUE( - aggregationParams = AGGREGATION_PARAMS.copy(minValue = 1.5, maxValue = 1.0), + aggregationParams = + AGGREGATION_PARAMS.copy( + features = ImmutableList.of(createScalarFeatureSpec(minValue = 1.5, maxValue = 1.0)) + ), exceptionMessage = "minValue must be less than maxValue. Provided values: " + "minValue=1.5, maxValue=1.0", ), MIN_VALUE_IS_EQUAL_TO_MAX_VALUE( - aggregationParams = AGGREGATION_PARAMS.copy(minValue = 1.5, maxValue = 1.5), + aggregationParams = + AGGREGATION_PARAMS.copy( + features = ImmutableList.of(createScalarFeatureSpec(minValue = 1.5, maxValue = 1.5)) + ), exceptionMessage = "minValue must be less than maxValue. Provided values: " + "minValue=1.5, maxValue=1.5", ), MIN_TOTAL_VALUE_SET_MAX_TOTAL_VALUE_NOT_SET( - aggregationParams = AGGREGATION_PARAMS.copy(minTotalValue = 1.0, maxTotalValue = null), + aggregationParams = + AGGREGATION_PARAMS.copy( + features = + ImmutableList.of(createScalarFeatureSpec(minTotalValue = 1.0, maxTotalValue = null)) + ), exceptionMessage = "minTotalValue and maxTotalValue must be simultaneously equal or not equal to null. " + "Provided values: minTotalValue=1.0, maxTotalValue=null", ), MIN_TOTAL_VALUE_NOT_SET_MAX_TOTAL_VALUE_SET( - aggregationParams = AGGREGATION_PARAMS.copy(minTotalValue = null, maxTotalValue = 2.0), + aggregationParams = + AGGREGATION_PARAMS.copy( + features = + ImmutableList.of(createScalarFeatureSpec(minTotalValue = null, maxTotalValue = 2.0)) + ), exceptionMessage = "minTotalValue and maxTotalValue must be simultaneously equal or not equal to null.", ), MIN_TOTAL_VALUE_GREATER_THAN_MAX_TOTAL_VALUE( - aggregationParams = AGGREGATION_PARAMS.copy(minTotalValue = 2.0, maxTotalValue = 0.0), + aggregationParams = + AGGREGATION_PARAMS.copy( + features = + ImmutableList.of(createScalarFeatureSpec(minTotalValue = 2.0, maxTotalValue = 0.0)) + ), exceptionMessage = "minTotalValue must be less or equal to maxTotalValue. Provided values: " + "minTotalValue=2.0, maxTotalValue=0.0", ), - MEAN_WITH_TOTAL_VALUE( + SCALAR_FEATURE_WITHOUT_BOUNDS( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(SUM), MetricDefinition(MEAN)), - minValue = 0.0, - maxValue = 3.0, - minTotalValue = 1.5, - maxTotalValue = 5.0, + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(SUM)), + minValue = null, + maxValue = null, + minTotalValue = null, + maxTotalValue = null, + ) + ), ), - exceptionMessage = - "(minTotalValue, maxTotalValue) should not be set if MEAN metric is requested", + exceptionMessage = "(minTotalValue, maxTotalValue) must be set for SUM metrics.", ), - VARIANCE_WITH_TOTAL_VALUE( + MEAN_WITH_TOTAL_VALUE( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(SUM), MetricDefinition(VARIANCE)), - minValue = 0.0, - maxValue = 3.0, - minTotalValue = 1.5, - maxTotalValue = 5.0, + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(SUM), MetricDefinition(MEAN)), + minValue = 0.0, + maxValue = 3.0, + minTotalValue = 1.5, + maxTotalValue = 5.0, + ) + ) ), exceptionMessage = - "(minTotalValue, maxTotalValue) should not be set if VARIANCE metric is requested", + "(minTotalValue, maxTotalValue) should not be set if MEAN metric is requested", ), - MAX_CONTRIBUTIONS_PER_PARTITION_MAX_CONTRIBUTIONS_NOT_SET_FOR_COUNT( + VARIANCE_WITH_TOTAL_VALUE( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(SUM)), - maxContributionsPerPartition = null, - maxContributions = null, - minTotalValue = -1.0, - maxTotalValue = 1.0, - minValue = null, - maxValue = null, + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(SUM), MetricDefinition(VARIANCE)), + minValue = 0.0, + maxValue = 3.0, + minTotalValue = 1.5, + maxTotalValue = 5.0, + ) + ) ), exceptionMessage = - "maxContributionsPerPartition or maxContributions must be set for COUNT metric.", + "(minTotalValue, maxTotalValue) should not be set if VARIANCE metric is requested", ), MAX_CONTRIBUTIONS_PER_PARTITION_MAX_CONTRIBUTIONS_NOT_SET_FOR_MEAN( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), + minTotalValue = -1.0, + maxTotalValue = 1.0, + ) + ), maxContributionsPerPartition = null, maxContributions = null, - minTotalValue = -1.0, - maxTotalValue = 1.0, - minValue = null, - maxValue = null, ), exceptionMessage = "maxContributionsPerPartition or maxContributions must be set for MEAN metric.", @@ -271,44 +349,59 @@ class DpFunctionsParamsTest { MAX_CONTRIBUTIONS_PER_PARTITION_MAX_CONTRIBUTIONS_NOT_SET_FOR_QUANTILES( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = + nonFeatureMetrics = ImmutableList.of(), + features = ImmutableList.of( - MetricDefinition(QUANTILES(ranks = ImmutableList.of())), - MetricDefinition(SUM), + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of( + MetricDefinition(QUANTILES(ranks = ImmutableList.of())), + MetricDefinition(SUM), + ), + minTotalValue = -1.0, + maxTotalValue = 1.0, + ) ), maxContributionsPerPartition = null, - minTotalValue = -1.0, - maxTotalValue = 1.0, - minValue = null, - maxValue = null, ), exceptionMessage = "maxContributionsPerPartition must be set for QUANTILES metric.", ), MIN_TOTAL_VALUE_NOT_SET_FOR_SUM( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(SUM)), - minTotalValue = null, - maxTotalValue = null, + features = + ImmutableList.of( + createScalarFeatureSpec(nonFeatureMetrics = ImmutableList.of(MetricDefinition(SUM))) + ) ), exceptionMessage = "(minTotalValue, maxTotalValue) must be set for SUM metrics.", ), MIN_VALUE_NOT_SET_FOR_MEAN( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), - minTotalValue = 0.0, - maxTotalValue = 1.0, - minValue = null, - maxValue = null, + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), + minValue = null, + maxValue = null, + minTotalValue = 0.0, + maxTotalValue = 1.0, + ) + ) ), exceptionMessage = "(minValue, maxValue) must be set for MEAN metric.", ), VALUE_EXTRACTOR_NOT_SET_FOR_SUM_AND_MEAN( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = - ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(MEAN), MetricDefinition(SUM)) + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)) + ) + ), ), hasValueExtractor = false, exceptionMessage = "Metrics [MEAN, SUM] require a value extractor.", @@ -316,32 +409,49 @@ class DpFunctionsParamsTest { MIN_VALUE_NOT_SET_FOR_QUANTILES( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(QUANTILES(ranks = ImmutableList.of()))), - minTotalValue = 0.0, - maxTotalValue = 1.0, - minValue = null, - maxValue = null, + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(QUANTILES(ranks = ImmutableList.of()))), + minValue = null, + maxValue = null, + minTotalValue = 0.0, + maxTotalValue = 1.0, + ) + ) ), exceptionMessage = "(minValue, maxValue) must be set for QUANTILES metric.", ), BUDGET_SPEC_SET_FOR_MEAN_AND_COUNT( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = + nonFeatureMetrics = ImmutableList.of( - MetricDefinition(MEAN, RelativeBudgetPerOpSpec(weight = 1.0)), - MetricDefinition(COUNT, AbsoluteBudgetPerOpSpec(epsilon = 2.0, delta = 1e-12)), - ) + MetricDefinition(COUNT, AbsoluteBudgetPerOpSpec(epsilon = 2.0, delta = 1e-12)) + ), + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(MEAN, RelativeBudgetPerOpSpec(weight = 1.0))) + ) + ), ), exceptionMessage = "BudgetPerOpSpec can not be set for both COUNT and MEAN metrics.", ), BUDGET_SPEC_SET_FOR_MEAN_AND_SUM( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = + features = ImmutableList.of( - MetricDefinition(MEAN, RelativeBudgetPerOpSpec(weight = 1.0)), - MetricDefinition(SUM, RelativeBudgetPerOpSpec(weight = 2.0)), + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of( + MetricDefinition(MEAN, RelativeBudgetPerOpSpec(weight = 1.0)), + MetricDefinition(SUM, RelativeBudgetPerOpSpec(weight = 2.0)), + ) + ) ) ), exceptionMessage = "BudgetPerOpSpec can not be set for both SUM and MEAN metrics.", @@ -349,13 +459,17 @@ class DpFunctionsParamsTest { MAX_CONTRIBUTIONS_PER_PARTITION_MAX_CONTRIBUTIONS_NOT_SET_FOR_VARIANCE( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minTotalValue = -1.0, + maxTotalValue = 1.0, + ) + ), maxContributionsPerPartition = null, maxContributions = null, - minTotalValue = -1.0, - maxTotalValue = 1.0, - minValue = null, - maxValue = null, ), exceptionMessage = "maxContributionsPerPartition or maxContributions must be set for VARIANCE metric.", @@ -363,32 +477,31 @@ class DpFunctionsParamsTest { MIN_VALUE_NOT_SET_FOR_VARIANCE( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VARIANCE)), - minTotalValue = 0.0, - maxTotalValue = 1.0, - minValue = null, - maxValue = null, - ), - exceptionMessage = "(minValue, maxValue) must be set for VARIANCE metric.", - ), - MAX_VALUE_NOT_SET_FOR_VARIANCE( - aggregationParams = - AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VARIANCE)), - minTotalValue = -1.0, - maxTotalValue = 0.0, - minValue = null, - maxValue = null, + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = null, + maxValue = null, + minTotalValue = 0.0, + maxTotalValue = 1.0, + ) + ) ), exceptionMessage = "(minValue, maxValue) must be set for VARIANCE metric.", ), BUDGET_SPEC_SET_FOR_VARIANCE_AND_MEAN( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = + features = ImmutableList.of( - MetricDefinition(VARIANCE, RelativeBudgetPerOpSpec(weight = 1.0)), - MetricDefinition(MEAN, RelativeBudgetPerOpSpec(weight = 1.0)), + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of( + MetricDefinition(VARIANCE, RelativeBudgetPerOpSpec(weight = 1.0)), + MetricDefinition(MEAN, RelativeBudgetPerOpSpec(weight = 1.0)), + ) + ) ) ), exceptionMessage = "BudgetPerOpSpec can not be set for both MEAN and VARIANCE metrics.", @@ -396,21 +509,34 @@ class DpFunctionsParamsTest { BUDGET_SPEC_SET_FOR_VARIANCE_AND_COUNT( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = + nonFeatureMetrics = ImmutableList.of( - MetricDefinition(VARIANCE, RelativeBudgetPerOpSpec(weight = 1.0)), - MetricDefinition(COUNT, AbsoluteBudgetPerOpSpec(epsilon = 2.0, delta = 1e-12)), - ) + MetricDefinition(COUNT, AbsoluteBudgetPerOpSpec(epsilon = 2.0, delta = 1e-12)) + ), + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of( + MetricDefinition(VARIANCE, RelativeBudgetPerOpSpec(weight = 1.0)) + ) + ) + ), ), exceptionMessage = "BudgetPerOpSpec can not be set for both COUNT and VARIANCE metrics.", ), BUDGET_SPEC_SET_FOR_VARIANCE_AND_SUM( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = + features = ImmutableList.of( - MetricDefinition(VARIANCE, RelativeBudgetPerOpSpec(weight = 1.0)), - MetricDefinition(SUM, RelativeBudgetPerOpSpec(weight = 2.0)), + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of( + MetricDefinition(VARIANCE, RelativeBudgetPerOpSpec(weight = 1.0)), + MetricDefinition(SUM, RelativeBudgetPerOpSpec(weight = 2.0)), + ) + ) ) ), exceptionMessage = "BudgetPerOpSpec can not be set for both SUM and VARIANCE metrics.", @@ -426,7 +552,7 @@ class DpFunctionsParamsTest { DUPLICATE_METRIC_TYPES( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = + nonFeatureMetrics = ImmutableList.of( MetricDefinition(COUNT), MetricDefinition(PRIVACY_ID_COUNT), @@ -434,78 +560,79 @@ class DpFunctionsParamsTest { ) ), exceptionMessage = - "metrics must not contain duplicate metric types. Provided " + + "nonFeatureMetrics must not contain duplicate metric types. Provided " + "[COUNT, PRIVACY_ID_COUNT, COUNT].", ), - NORM_KIND_NOT_SET_FOR_VECTOR_SUM( + DUPLICATE_METRIC_TYPES_IN_FEATURE( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), - vectorNormKind = null, - vectorMaxTotalNorm = 2.3, - vectorSize = 2, + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + createScalarFeatureSpec( + featureId = "feature1", + nonFeatureMetrics = ImmutableList.of(MetricDefinition(SUM), MetricDefinition(SUM)), + minTotalValue = 1.0, + maxTotalValue = 2.0, + ) + ), ), - exceptionMessage = "vectorNormKind must be set for VECTOR_SUM metric.", + exceptionMessage = + "feature feature1 must not contain duplicate metric types. Provided [SUM, SUM]", ), - L2_NORM_KIND_WHEN_LAPLACE_NOISE_IS_USED( + DUPLICATE_FEATURE_ID( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), - noiseKind = NoiseKind.LAPLACE, - vectorNormKind = NormKind.L2, - vectorMaxTotalNorm = 2.3, - vectorSize = 2, + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + createScalarFeatureSpec( + featureId = "feature1", + nonFeatureMetrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = 1.0, + maxTotalValue = 2.0, + ), + createScalarFeatureSpec( + featureId = "feature1", + nonFeatureMetrics = ImmutableList.of(MetricDefinition(MEAN)), + minValue = 1.0, + maxValue = 2.0, + ), + ), ), - exceptionMessage = "vectorNormKind must be L_INF or L1 for LAPLACE noise.", + exceptionMessage = "featureId must be unique. Provided [feature1, feature1]", ), - L1_NORM_KIND_WHEN_LAPLACE_NOISE_IS_USED( + L2_NORM_KIND_WHEN_LAPLACE_NOISE_IS_USED( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), - noiseKind = NoiseKind.GAUSSIAN, - vectorNormKind = NormKind.L1, - vectorMaxTotalNorm = 2.3, - vectorSize = 2, + noiseKind = NoiseKind.LAPLACE, + features = + ImmutableList.of( + createVectorFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + normKind = NormKind.L2, + vectorMaxTotalNorm = 2.3, + vectorSize = 2, + ) + ), ), - exceptionMessage = "vectorNormKind must be L_INF or L2 for GAUSSIAN noise.", + exceptionMessage = "vectorNormKind must be L_INF or L1 for LAPLACE noise.", ), - MAX_TOTAL_NORM_NOT_SET_FOR_VECTOR_SUM( + L1_NORM_KIND_WHEN_GAUSSIAN_NOISE_IS_USED( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), noiseKind = NoiseKind.GAUSSIAN, - vectorNormKind = NormKind.L2, - vectorMaxTotalNorm = null, - vectorSize = 2, - ), - exceptionMessage = "vectorMaxTotalNorm must be set for VECTOR_SUM metric.", - ), - VECTOR_SIZE_NOT_SET_FOR_VECTOR_SUM( - aggregationParams = - AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), - vectorNormKind = NormKind.L_INF, - vectorMaxTotalNorm = 1.0, - vectorSize = null, - ), - exceptionMessage = "vectorSize must be set for VECTOR_SUM metric.", - ), - VECTOR_SUM_IS_REQUESTED_TOGETHER_WITH_SCALAR_METRICS( - aggregationParams = - AGGREGATION_PARAMS.copy( - metrics = + features = ImmutableList.of( - MetricDefinition(VECTOR_SUM), - MetricDefinition(SUM), - MetricDefinition(MEAN), - MetricDefinition(VARIANCE), + createVectorFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + normKind = NormKind.L1, + vectorMaxTotalNorm = 2.3, + vectorSize = 2, + ) ), - vectorNormKind = NormKind.L_INF, - vectorMaxTotalNorm = 1.0, - vectorSize = 3, ), - exceptionMessage = - "VECTOR_SUM can not be computed together with scalar metrics such as SUM, MEAN, VARIANCE and QUANTILES.", + exceptionMessage = "vectorNormKind must be L_INF or L2 for GAUSSIAN noise.", ), } @@ -593,13 +720,13 @@ class DpFunctionsParamsTest { companion object { val AGGREGATION_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(PRIVACY_ID_COUNT)), + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(PRIVACY_ID_COUNT)), noiseKind = NoiseKind.LAPLACE, maxPartitionsContributed = 1, maxContributionsPerPartition = 1, maxContributions = null, - minValue = 0.0, - maxValue = 1.0, + features = ImmutableList.of(createScalarFeatureSpec(minValue = 0.0, maxValue = 1.0)), ) val SELECT_PARTITIONS_PARAMS = @@ -608,5 +735,30 @@ class DpFunctionsParamsTest { budget = AbsoluteBudgetPerOpSpec(epsilon = 1.0, delta = 1e-12), preThreshold = 10, ) + + private fun createScalarFeatureSpec( + featureId: String = "value", + nonFeatureMetrics: ImmutableList = ImmutableList.of(), + minValue: Double? = 0.0, + maxValue: Double? = 1.0, + minTotalValue: Double? = null, + maxTotalValue: Double? = null, + ) = + ScalarFeatureSpec( + featureId, + nonFeatureMetrics, + minValue, + maxValue, + minTotalValue, + maxTotalValue, + ) + + private fun createVectorFeatureSpec( + featureId: String = "value", + nonFeatureMetrics: ImmutableList = ImmutableList.of(), + vectorSize: Int = 1, + normKind: NormKind = NormKind.L1, + vectorMaxTotalNorm: Double = 1.0, + ) = VectorFeatureSpec(featureId, nonFeatureMetrics, vectorSize, normKind, vectorMaxTotalNorm) } } diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/EndToEndTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/EndToEndTest.kt index 62e67922..8b012263 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/EndToEndTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/EndToEndTest.kt @@ -75,13 +75,19 @@ class EndToEndTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = - ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(SUM), MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM), MetricDefinition(MEAN)), + minValue = -2.0, + maxValue = 2.0, + ) + ), noiseKind = LAPLACE, maxPartitionsContributed = 1, maxContributionsPerPartition = 1, - minValue = -2.0, - maxValue = 2.0, ) val dpAggregates = @@ -90,8 +96,10 @@ class EndToEndTest { val partitionResult = dpAggregates.data.toMap()["US"]!! assertThat(partitionResult.count).isWithin(1e-1).of(100.0) - assertThat(partitionResult.sum).isWithin(1e-1).of(200.0) - assertThat(partitionResult.mean).isWithin(1e-10).of(partitionResult.sum / partitionResult.count) + assertThat(partitionResult.perFeatureList.first().sum).isWithin(1e-1).of(200.0) + assertThat(partitionResult.perFeatureList.first().mean) + .isWithin(1e-10) + .of(partitionResult.perFeatureList.first().sum / partitionResult.count) } @Test @@ -104,12 +112,20 @@ class EndToEndTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(PRIVACY_ID_COUNT)), + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(PRIVACY_ID_COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(), + minValue = -2.0, + maxValue = 2.0, + ) + ), noiseKind = LAPLACE, maxPartitionsContributed = 1, maxContributionsPerPartition = 1, - minValue = -2.0, - maxValue = 2.0, ) val dpAggregates = @@ -146,7 +162,7 @@ class EndToEndTest { ) val params = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = LAPLACE, // Contribution bounding would be applied if it was not disabled. maxPartitionsContributed = 1, @@ -190,7 +206,7 @@ class EndToEndTest { ) val params = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = LAPLACE, maxPartitionsContributed = 2, // Contributions to each of the two partitions are kept. maxContributionsPerPartition = 1, // Double contributions per partition are removed. @@ -218,13 +234,19 @@ class EndToEndTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = - ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(SUM), MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM), MetricDefinition(MEAN)), + minValue = -2.0, + maxValue = 2.0, + ) + ), noiseKind = LAPLACE, maxPartitionsContributed = 1, maxContributionsPerPartition = 1, - minValue = -2.0, - maxValue = 2.0, ) val dpAggregates = @@ -234,8 +256,10 @@ class EndToEndTest { val partitionResult = dpAggregates.data.toMap()["US"]!! assertThat(partitionResult.count).isWithin(1e-1).of(2.0) - assertThat(partitionResult.sum).isWithin(1e-1).of(3.0) - assertThat(partitionResult.mean).isWithin(1e-10).of(partitionResult.sum / partitionResult.count) + assertThat(partitionResult.perFeatureList.first().sum).isWithin(1e-1).of(3.0) + assertThat(partitionResult.perFeatureList.first().mean) + .isWithin(1e-10) + .of(partitionResult.perFeatureList.first().sum / partitionResult.count) } @Test @@ -247,17 +271,23 @@ class EndToEndTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = + nonFeatureMetrics = ImmutableList.of( MetricDefinition(COUNT, AbsoluteBudgetPerOpSpec(0.1, 1e-5)), - MetricDefinition(SUM, AbsoluteBudgetPerOpSpec(0.1, 1e-5)), MetricDefinition(PRIVACY_ID_COUNT, AbsoluteBudgetPerOpSpec(0.1, 1e-5)), ), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM, AbsoluteBudgetPerOpSpec(0.1, 1e-5))), + minTotalValue = -5.0, + maxTotalValue = 5.0, + ) + ), noiseKind = GAUSSIAN, maxPartitionsContributed = 5, maxContributionsPerPartition = 5, - minTotalValue = -5.0, - maxTotalValue = 5.0, ) val dpAggregates = @@ -270,8 +300,8 @@ class EndToEndTest { assertThat(dpAggregates.data.toMap()["US"]!!.count) .isNotEqualTo(dpAggregatesAnotherRun.data.toMap()["US"]!!.count) - assertThat(dpAggregates.data.toMap()["US"]!!.sum) - .isNotEqualTo(dpAggregatesAnotherRun.data.toMap()["US"]!!.sum) + assertThat(dpAggregates.data.toMap()["US"]!!.perFeatureList.first().sum) + .isNotEqualTo(dpAggregatesAnotherRun.data.toMap()["US"]!!.perFeatureList.first().sum) assertThat(dpAggregates.data.toMap()["US"]!!.privacyIdCount) .isNotEqualTo(dpAggregatesAnotherRun.data.toMap()["US"]!!.privacyIdCount) } diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/ExactPrivacyIdCountCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/ExactPrivacyIdCountCombinerTest.kt index b3ca1451..73e8519a 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/ExactPrivacyIdCountCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/ExactPrivacyIdCountCombinerTest.kt @@ -18,6 +18,7 @@ package com.google.privacy.differentialprivacy.pipelinedp4j.core import com.google.common.truth.Truth.assertThat import com.google.common.truth.extensions.proto.ProtoTruth.assertThat +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdCountAccumulator import com.google.testing.junit.testparameterinjector.TestParameterInjector @@ -34,7 +35,9 @@ class ExactPrivacyIdCountCombinerTest { val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(1.0, 1.0, 1.0) } + privacyIdContributions { + features += featureContribution { singleValueContributions += listOf(1.0, 1.0, 1.0) } + } ) assertThat(accumulator).isEqualTo(privacyIdCountAccumulator { count = 1 }) diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/MeanCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/MeanCombinerTest.kt index 1e566584..4a47e660 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/MeanCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/MeanCombinerTest.kt @@ -27,6 +27,7 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.core.MetricType.SUM import com.google.privacy.differentialprivacy.pipelinedp4j.core.budget.AllocatedBudget import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.NoiseFactory import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.ZeroNoiseFactory +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.meanAccumulator import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions import com.google.testing.junit.testparameterinjector.TestParameter @@ -42,12 +43,14 @@ class MeanCombinerTest { companion object { private val AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec("value", ImmutableList.of(MetricDefinition(MEAN)), -10.0, 10.0) + ), noiseKind = NoiseKind.GAUSSIAN, maxPartitionsContributed = 3, maxContributionsPerPartition = 5, - minValue = -10.0, - maxValue = 10.0, ) private val noiseMock: Noise = mock() @@ -70,6 +73,7 @@ class MeanCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + AGG_PARAMS.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.emptyAccumulator() @@ -77,6 +81,7 @@ class MeanCombinerTest { assertThat(accumulator) .isEqualTo( meanAccumulator { + featureId = "value" count = 0 normalizedSum = 0.0 } @@ -85,21 +90,33 @@ class MeanCombinerTest { @Test fun createAccumulator_doesNotClampContributionsWithinBounds() { + val featureSpec = + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy(minValue = -10.0, maxValue = 10.0) + val params = AGG_PARAMS.copy(features = ImmutableList.of(featureSpec)) val combiner = MeanCombiner( - AGG_PARAMS.copy(minValue = -10.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + featureSpec, ) val accumulator = - combiner.createAccumulator(privacyIdContributions { singleValueContributions += listOf(5.5) }) + combiner.createAccumulator( + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(5.5) + } + } + ) assertThat(accumulator) .isEqualTo( meanAccumulator { + featureId = "value" count = 1 normalizedSum = 5.5 } @@ -108,23 +125,33 @@ class MeanCombinerTest { @Test fun createAccumulator_privacyLevelWithContributionBounding_clampsValues() { + val featureSpec = + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy(minValue = -10.0, maxValue = 10.0) + val params = AGG_PARAMS.copy(features = ImmutableList.of(featureSpec)) val combiner = MeanCombiner( - AGG_PARAMS.copy(minValue = -10.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + featureSpec, ) val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(-20.0, 30.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(-20.0, 30.0) + } + } ) assertThat(accumulator) .isEqualTo( meanAccumulator { + featureId = "value" count = 2 normalizedSum = 0.0 // = sum of clamped values = -10 + 10 } @@ -133,23 +160,33 @@ class MeanCombinerTest { @Test fun createAccumulator_fullTestMode_doesNotClampValues() { + val featureSpec = + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy(minValue = -10.0, maxValue = 10.0) + val params = AGG_PARAMS.copy(features = ImmutableList.of(featureSpec)) val combiner = MeanCombiner( - AGG_PARAMS.copy(minValue = -10.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), FULL_TEST_MODE, + featureSpec, ) val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(-20.0, 30.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(-20.0, 30.0) + } + } ) assertThat(accumulator) .isEqualTo( meanAccumulator { + featureId = "value" count = 2 normalizedSum = 10.0 // = sum of non-clamped values = -20 + 30 } @@ -158,21 +195,35 @@ class MeanCombinerTest { @Test fun createAccumulator_normalizesSum() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of((AGG_PARAMS.features[0] as ScalarFeatureSpec).copy(minValue = 5.0)) + ) val combiner = MeanCombiner( - AGG_PARAMS.copy(minValue = 5.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = - combiner.createAccumulator(privacyIdContributions { singleValueContributions += listOf(6.0) }) + combiner.createAccumulator( + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(6.0) + } + } + ) assertThat(accumulator) .isEqualTo( meanAccumulator { + featureId = "value" count = 1 normalizedSum = -1.5 } @@ -181,23 +232,37 @@ class MeanCombinerTest { @Test fun createAccumulator_normalizationAndClamping() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy(minValue = 5.0, maxValue = 10.0) + ) + ) val combiner = MeanCombiner( - AGG_PARAMS.copy(minValue = 5.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(30.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(30.0) + } + } ) assertThat(accumulator) .isEqualTo( meanAccumulator { + featureId = "value" count = 1 normalizedSum = 2.5 } @@ -206,23 +271,35 @@ class MeanCombinerTest { @Test fun createAccumulator_aggregatesMultipleElements() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of((AGG_PARAMS.features[0] as ScalarFeatureSpec).copy(minValue = 4.0)) + ) val combiner = MeanCombiner( - AGG_PARAMS.copy(minValue = 4.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(3.0, 5.5, 6.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(3.0, 5.5, 6.0) + } + } ) assertThat(accumulator) .isEqualTo( meanAccumulator { + featureId = "value" count = 3 normalizedSum = -5.5 // = sum of normalized values = -3 - 1.5 - 1 } @@ -238,15 +315,18 @@ class MeanCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + AGG_PARAMS.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.mergeAccumulators( meanAccumulator { + featureId = "value" count = 1 normalizedSum = -5.0 }, meanAccumulator { + featureId = "value" count = 10 normalizedSum = 8.5 }, @@ -255,6 +335,7 @@ class MeanCombinerTest { assertThat(accumulator) .isEqualTo( meanAccumulator { + featureId = "value" count = 11 normalizedSum = 3.5 } @@ -267,19 +348,27 @@ class MeanCombinerTest { countBudget.initialize(2.0, 1e-5) val sumBudget = AllocatedBudget() sumBudget.initialize(1.0, 1e-3) + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy( + metrics = ImmutableList.of(MetricDefinition(MEAN)), + minValue = 4.0, + maxValue = 10.0, + ) + ), + maxPartitionsContributed = 5, + maxContributionsPerPartition = 7, + ) val combiner = MeanCombiner( - AGG_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(MEAN)), - maxPartitionsContributed = 5, - maxContributionsPerPartition = 7, - minValue = 4.0, - maxValue = 10.0, - ), + params, countBudget, sumBudget, noiseFactoryMock, ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = meanAccumulator { count = 10 @@ -316,25 +405,29 @@ class MeanCombinerTest { val sumBudget = AllocatedBudget() sumBudget.initialize(10000.0, 0.0) + val params = + AGG_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy( + metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), + minValue = 4.0, + maxValue = 12.0, + ) + ), + maxPartitionsContributed = 5, + maxContributionsPerPartition = 7, + noiseKind = NoiseKind.LAPLACE, + ) val combiner = MeanCombiner( - AGG_PARAMS.copy( - metrics = - ImmutableList.of( - MetricDefinition(MEAN), - MetricDefinition(SUM), - MetricDefinition(COUNT), - ), - maxPartitionsContributed = 5, - maxContributionsPerPartition = 7, - minValue = 4.0, - maxValue = 12.0, - noiseKind = NoiseKind.LAPLACE, - ), + params, countBudget, sumBudget, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = meanAccumulator { @@ -353,31 +446,15 @@ class MeanCombinerTest { assertThat(result.mean).isWithin(1e-9).of(result.sum!! / result.count!!) } - enum class ReturnedMetricsTestCase( - val requestedMetrics: ImmutableList, - val countExpected: Boolean, - val sumExpected: Boolean, - ) { - NO_SUM_NO_COUNT( - requestedMetrics = ImmutableList.of(MetricDefinition(MEAN)), - countExpected = false, - sumExpected = false, - ), - ONLY_SUM( - requestedMetrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), - countExpected = false, - sumExpected = true, - ), + enum class ReturnedMetricsTestCase(val requestedMetrics: ImmutableList) { + NO_SUM_NO_COUNT(requestedMetrics = ImmutableList.of(MetricDefinition(MEAN))), + ONLY_SUM(requestedMetrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM))), ONLY_COUNT( - requestedMetrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(COUNT)), - countExpected = true, - sumExpected = false, + requestedMetrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(COUNT)) ), COUNT_AND_SUM( requestedMetrics = - ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM), MetricDefinition(COUNT)), - countExpected = true, - sumExpected = true, + ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM), MetricDefinition(COUNT)) ), } @@ -385,13 +462,26 @@ class MeanCombinerTest { fun aggregate_computeMetrics_checkWhichMetricReturned( @TestParameter testCase: ReturnedMetricsTestCase ) { + val featureMetrics = testCase.requestedMetrics.filter { it.type == MEAN || it.type == SUM } + val nonFeatureMetrics = testCase.requestedMetrics.filter { it.type == COUNT } + val params = + AGG_PARAMS.copy( + nonFeatureMetrics = ImmutableList.copyOf(nonFeatureMetrics), + features = + ImmutableList.of( + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy( + metrics = ImmutableList.copyOf(featureMetrics) + ) + ), + ) val combiner = MeanCombiner( - AGG_PARAMS.copy(metrics = testCase.requestedMetrics), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val metrics = @@ -401,13 +491,13 @@ class MeanCombinerTest { normalizedSum = 120.0 } ) - if (testCase.countExpected) { + if (testCase.requestedMetrics.any { it.type == COUNT }) { assertThat(metrics.count).isNotNull() } else { assertThat(metrics.count).isNull() } - if (testCase.sumExpected) { + if (testCase.requestedMetrics.any { it.type == SUM }) { assertThat(metrics.sum).isNotNull() } else { assertThat(metrics.sum).isNull() @@ -416,22 +506,38 @@ class MeanCombinerTest { @Test fun computeMetrics_withoutNoise_withMultipleContributionsIncludingEmptyAccumulator_returnsCorrectResult() { + val featureSpec = + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy(minValue = -10.0, maxValue = 10.0) + val params = AGG_PARAMS.copy(features = ImmutableList.of(featureSpec)) val combiner = MeanCombiner( - AGG_PARAMS.copy(minValue = -10.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + featureSpec, ) val accumulator0 = combiner.emptyAccumulator() val accumulator1 = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(10.0, -10.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(10.0, -10.0) + } + } ) val accumulator2 = - combiner.createAccumulator(privacyIdContributions { singleValueContributions += listOf(9.0) }) + combiner.createAccumulator( + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(9.0) + } + } + ) val accumulator3 = combiner.mergeAccumulators(accumulator0, accumulator1) val finalAccumulator = combiner.mergeAccumulators(accumulator2, accumulator3) val result = combiner.computeMetrics(finalAccumulator) @@ -441,17 +547,26 @@ class MeanCombinerTest { @Test fun computeMetrics_withoutNoise_onlyEmptyAccumulator_returnsZeroCountAndNaNForSumAndMean() { + val params = + AGG_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy( + metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), + minValue = 4.0, + maxValue = 10.0, + ) + ), + ) val combiner = MeanCombiner( - AGG_PARAMS.copy( - ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM), MetricDefinition(COUNT)), - minValue = 4.0, - maxValue = 10.0, - ), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val result = combiner.computeMetrics(combiner.emptyAccumulator()) diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/NoPrivacySamplerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/NoPrivacySamplerTest.kt index bb636805..906c126b 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/NoPrivacySamplerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/NoPrivacySamplerTest.kt @@ -21,6 +21,7 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalCollection import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalEncoderFactory import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalTable import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributions +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions import org.junit.Test import org.junit.runner.RunWith @@ -61,7 +62,12 @@ class NoPrivacySamplerTest { NoPrivacySampler(LOCAL_EF.strings(), LOCAL_EF.strings(), LOCAL_EF) .sampleContributions(inputData) as LocalTable val returnedContributionsPk1 = - sampledData.data.toMap().getValue("pk1").singleValueContributionsList + sampledData.data + .toMap() + .getValue("pk1") + .featuresList + .find { it.featureId == "" }!! + .singleValueContributionsList // Returned contributions are of the same size as the originals. assertThat(returnedContributionsPk1).hasSize(4) @@ -128,12 +134,27 @@ class NoPrivacySamplerTest { mapOf( "pk1" to setOf( - privacyIdContributions { singleValueContributions += listOf(1.0, 2.0, 3.0, 4.0) } + privacyIdContributions { + features += featureContribution { + featureId = "" + singleValueContributions += listOf(1.0, 2.0, 3.0, 4.0) + } + } ), "pk2" to setOf( - privacyIdContributions { singleValueContributions += listOf(5.0, 6.0) }, - privacyIdContributions { singleValueContributions += listOf(7.0, 8.0) }, + privacyIdContributions { + features += featureContribution { + featureId = "" + singleValueContributions += listOf(5.0, 6.0) + } + }, + privacyIdContributions { + features += featureContribution { + featureId = "" + singleValueContributions += listOf(7.0, 8.0) + } + }, ), ) ) diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionAndPerPartitionSamplerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionAndPerPartitionSamplerTest.kt index 0224a928..30d7da02 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionAndPerPartitionSamplerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionAndPerPartitionSamplerTest.kt @@ -24,6 +24,7 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalCollection import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalEncoderFactory import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalTable import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributions +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.multiValueContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions import kotlin.Int.Companion.MAX_VALUE @@ -35,12 +36,14 @@ import org.junit.runners.JUnit4 class PartitionAndPerPartitionSamplerTest { val AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec("value", ImmutableList.of(MetricDefinition(MEAN)), -1.0, 1.0) + ), noiseKind = GAUSSIAN, maxPartitionsContributed = MAX_VALUE, maxContributionsPerPartition = MAX_VALUE, - minValue = -1.0, - maxValue = 1.0, ) @Test @@ -126,8 +129,9 @@ class PartitionAndPerPartitionSamplerTest { .sampleContributions(inputData) as LocalTable val returnedContributions = sampledData.data.toMap().get("samePk")!! - assertThat(returnedContributions.multiValueContributionsList.count()).isEqualTo(0) - val singleValueContributions = returnedContributions.singleValueContributionsList + val featureContribution = returnedContributions.featuresList.find { it.featureId == "" }!! + assertThat(featureContribution.multiValueContributionsList.count()).isEqualTo(0) + val singleValueContributions = featureContribution.singleValueContributionsList assertThat(singleValueContributions.count()).isEqualTo(3) // Returned values are all in the list of the contributed values. assertThat(listOf(1.0, 2.0, 3.0, 4.0)).containsAtLeastElementsIn(singleValueContributions) @@ -172,8 +176,9 @@ class PartitionAndPerPartitionSamplerTest { .sampleContributions(inputData) as LocalTable val returnedContributions = sampledData.data.toMap().get("samePk")!! - assertThat(returnedContributions.singleValueContributionsList.count()).isEqualTo(0) - val multiValueContributions = returnedContributions.multiValueContributionsList + val featureContribution = returnedContributions.featuresList.find { it.featureId == "" }!! + assertThat(featureContribution.singleValueContributionsList.count()).isEqualTo(0) + val multiValueContributions = featureContribution.multiValueContributionsList assertThat(multiValueContributions.count()).isEqualTo(3) // Returned values are all in the list of the contributed values. assertThat( @@ -232,9 +237,33 @@ class PartitionAndPerPartitionSamplerTest { assertThat(sampledData.data.toList()) .containsExactly( - Pair("pk", privacyIdContributions { singleValueContributions += listOf(1.0, 2.0) }), - Pair("anotherPk", privacyIdContributions { singleValueContributions += listOf(3.0, 4.0) }), - Pair("pk", privacyIdContributions { singleValueContributions += 5.0 }), + Pair( + "pk", + privacyIdContributions { + features += featureContribution { + featureId = "" + singleValueContributions += listOf(1.0, 2.0) + } + }, + ), + Pair( + "anotherPk", + privacyIdContributions { + features += featureContribution { + featureId = "" + singleValueContributions += listOf(3.0, 4.0) + } + }, + ), + Pair( + "pk", + privacyIdContributions { + features += featureContribution { + featureId = "" + singleValueContributions += 5.0 + } + }, + ), ) } @@ -294,7 +323,13 @@ class PartitionAndPerPartitionSamplerTest { ) .sampleContributions(inputData) as LocalTable - val returnedContributions = sampledData.data.toMap().get("pk")!!.singleValueContributionsList + val returnedContributions = + sampledData.data + .toMap() + .get("pk")!! + .featuresList + .find { it.featureId == "" }!! + .singleValueContributionsList assertThat(returnedContributions.count()).isEqualTo(300) } diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionSamplerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionSamplerTest.kt index 18086234..8bf90831 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionSamplerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionSamplerTest.kt @@ -21,6 +21,7 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalCollection import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalEncoderFactory import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalTable import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributions +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.multiValueContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions import org.junit.Test @@ -104,7 +105,16 @@ class PartitionSamplerTest { ) .sampleContributions(inputData) as LocalTable - assertThat(sampledData.data.toMap().get("pk")!!.singleValueContributionsList.size).isEqualTo(3) + assertThat( + sampledData.data + .toMap() + .get("pk")!! + .featuresList + .find { it.featureId == "" }!! + .singleValueContributionsList + .size + ) + .isEqualTo(3) } @Test @@ -141,8 +151,24 @@ class PartitionSamplerTest { assertThat(sampledData.data.toList()) .containsExactly( - Pair("pk", privacyIdContributions { singleValueContributions += listOf(1.0, 1.0) }), - Pair("pk", privacyIdContributions { singleValueContributions += 2.0 }), + Pair( + "pk", + privacyIdContributions { + features += featureContribution { + featureId = "" + singleValueContributions += listOf(1.0, 1.0) + } + }, + ), + Pair( + "pk", + privacyIdContributions { + features += featureContribution { + featureId = "" + singleValueContributions += 2.0 + } + }, + ), ) } @@ -183,17 +209,24 @@ class PartitionSamplerTest { Pair( "pk", privacyIdContributions { - multiValueContributions += - listOf( - multiValueContribution { values += listOf(1.0, 2.0) }, - multiValueContribution { values += listOf(3.0, 4.0) }, - ) + features += featureContribution { + featureId = "" + multiValueContributions += + listOf( + multiValueContribution { values += listOf(1.0, 2.0) }, + multiValueContribution { values += listOf(3.0, 4.0) }, + ) + } }, ), Pair( "pk", privacyIdContributions { - multiValueContributions += listOf(multiValueContribution { values += listOf(5.0, 6.0) }) + features += featureContribution { + featureId = "" + multiValueContributions += + listOf(multiValueContribution { values += listOf(5.0, 6.0) }) + } }, ), ) diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionSamplerWithoutValuesTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionSamplerWithoutValuesTest.kt index 3287a447..963b1c88 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionSamplerWithoutValuesTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionSamplerWithoutValuesTest.kt @@ -104,7 +104,7 @@ class PartitionSamplerWithoutValuesTest { .sampleContributions(inputData) as LocalTable // Check that all values are dropped - assertThat(sampledData.data.toMap().get("pk")!!.singleValueContributionsList.size).isEqualTo(0) + assertThat(sampledData.data.toMap()["pk"]!!.featuresList).isEmpty() } @Test diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PerPartitionContributionsSamplerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PerPartitionContributionsSamplerTest.kt index cc45e02e..82c9f9ac 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PerPartitionContributionsSamplerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PerPartitionContributionsSamplerTest.kt @@ -24,6 +24,7 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalCollection import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalEncoderFactory import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalTable import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributions +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.multiValueContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions import org.junit.Test @@ -69,9 +70,10 @@ class PerPartitionContributionsSamplerTest { ) .sampleContributions(inputData) as LocalTable val returnedContributions = sampledData.data.toMap().get("pk1")!! - assertThat(returnedContributions.multiValueContributionsList.count()).isEqualTo(0) - val singleValueContributions = returnedContributions.singleValueContributionsList - assertThat(singleValueContributions.count()).isEqualTo(2) + val featureContribution = returnedContributions.featuresList.find { it.featureId == "" }!! + assertThat(featureContribution.multiValueContributionsList).isEmpty() + val singleValueContributions = featureContribution.singleValueContributionsList + assertThat(singleValueContributions.size).isEqualTo(2) // Returned values are all in the list of the contributed values. assertThat(listOf(1.0, 2.0, 3.0, 4.0)).containsAtLeastElementsIn(singleValueContributions) } @@ -113,9 +115,10 @@ class PerPartitionContributionsSamplerTest { ) .sampleContributions(inputData) as LocalTable val returnedContributions = sampledData.data.toMap().get("pk1")!! - assertThat(returnedContributions.singleValueContributionsList.count()).isEqualTo(0) - val multiValueContributions = returnedContributions.multiValueContributionsList - assertThat(multiValueContributions.count()).isEqualTo(2) + val featureContribution = returnedContributions.featuresList.find { it.featureId == "" }!! + assertThat(featureContribution.singleValueContributionsList).isEmpty() + val multiValueContributions = featureContribution.multiValueContributionsList + assertThat(multiValueContributions.size).isEqualTo(2) // Returned values are all in the list of the contributed values. assertThat( listOf( @@ -142,9 +145,33 @@ class PerPartitionContributionsSamplerTest { assertThat(sampledData.data.toList()) .containsExactly( - Pair("pk1", privacyIdContributions { singleValueContributions += listOf(1.0, 2.0, 3.0) }), - Pair("pk2", privacyIdContributions { singleValueContributions += listOf(4.0, 5.0, 6.0) }), - Pair("pk1", privacyIdContributions { singleValueContributions += listOf(7.0) }), + Pair( + "pk1", + privacyIdContributions { + features += featureContribution { + featureId = "" + singleValueContributions += listOf(1.0, 2.0, 3.0) + } + }, + ), + Pair( + "pk2", + privacyIdContributions { + features += featureContribution { + featureId = "" + singleValueContributions += listOf(4.0, 5.0, 6.0) + } + }, + ), + Pair( + "pk1", + privacyIdContributions { + features += featureContribution { + featureId = "" + singleValueContributions += listOf(7.0) + } + }, + ), ) } @@ -165,7 +192,16 @@ class PerPartitionContributionsSamplerTest { // Returned partition keys should only have 1 contribution from each privacy ID. for (pk in returnedPks) { - assertThat(sampledData.data.toMap().getValue(pk).singleValueContributionsList).hasSize(1) + assertThat( + sampledData.data + .toMap() + .getValue(pk) + .featuresList + .find { it.featureId == "" }!! + .singleValueContributionsList + .size + ) + .isEqualTo(1) } } @@ -195,15 +231,21 @@ class PerPartitionContributionsSamplerTest { LOCAL_EF, ) .sampleContributions(inputData) as LocalTable - val returnedContributions = sampledData.data.toMap().getValue("pk").singleValueContributionsList + val returnedContributions = + sampledData.data + .toMap() + .getValue("pk") + .featuresList + .find { it.featureId == "" }!! + .singleValueContributionsList - assertThat(returnedContributions.count()).isEqualTo(300) + assertThat(returnedContributions.size).isEqualTo(300) } private companion object { val aggParams = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(MEAN)), noiseKind = GAUSSIAN, maxPartitionsContributed = 1, maxContributionsPerPartition = 2, diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PostAggregationPartitionSelectionCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PostAggregationPartitionSelectionCombinerTest.kt index 12f0b745..179fec66 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PostAggregationPartitionSelectionCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PostAggregationPartitionSelectionCombinerTest.kt @@ -23,6 +23,7 @@ import com.google.privacy.differentialprivacy.Noise import com.google.privacy.differentialprivacy.pipelinedp4j.core.NoiseKind.GAUSSIAN import com.google.privacy.differentialprivacy.pipelinedp4j.core.budget.AllocatedBudget import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.NoiseFactory +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.multiValueContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdCountAccumulator @@ -50,7 +51,9 @@ class PostAggregationPartitionSelectionCombinerTest { val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(1.0, 1.0, 1.0) } + privacyIdContributions { + features += featureContribution { singleValueContributions += listOf(1.0, 1.0, 1.0) } + } ) assertThat(accumulator).isEqualTo(privacyIdCountAccumulator { count = 1 }) @@ -70,12 +73,14 @@ class PostAggregationPartitionSelectionCombinerTest { val accumulator = combiner.createAccumulator( privacyIdContributions { - multiValueContributions += - listOf( - multiValueContribution { values += listOf(1.0, 1.0, 1.0) }, - multiValueContribution { values += listOf(2.0, 2.0, 2.0) }, - multiValueContribution { values += listOf(3.0, 3.0, 3.0) }, - ) + features += featureContribution { + multiValueContributions += + listOf( + multiValueContribution { values += listOf(1.0, 1.0, 1.0) }, + multiValueContribution { values += listOf(2.0, 2.0, 2.0) }, + multiValueContribution { values += listOf(3.0, 3.0, 3.0) }, + ) + } } ) @@ -200,7 +205,7 @@ class PostAggregationPartitionSelectionCombinerTest { companion object { private val AGGREGATION_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(MetricType.PRIVACY_ID_COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(MetricType.PRIVACY_ID_COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 3, maxContributionsPerPartition = 5, diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivacyIdCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivacyIdCombinerTest.kt index 2b70c0f7..76b4c954 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivacyIdCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivacyIdCombinerTest.kt @@ -24,6 +24,7 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.core.MetricType.PRIVA import com.google.privacy.differentialprivacy.pipelinedp4j.core.NoiseKind.GAUSSIAN import com.google.privacy.differentialprivacy.pipelinedp4j.core.budget.AllocatedBudget import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.NoiseFactory +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.multiValueContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdCountAccumulator @@ -40,7 +41,7 @@ class PrivacyIdCombinerTest { companion object { private val AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 3, maxContributionsPerPartition = 5, @@ -69,7 +70,9 @@ class PrivacyIdCombinerTest { val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(1.0, 1.0, 1.0) } + privacyIdContributions { + features += featureContribution { singleValueContributions += listOf(1.0, 1.0, 1.0) } + } ) assertThat(accumulator).isEqualTo(privacyIdCountAccumulator { count = 1 }) @@ -88,12 +91,14 @@ class PrivacyIdCombinerTest { val accumulator = combiner.createAccumulator( privacyIdContributions { - multiValueContributions += - listOf( - multiValueContribution { values += listOf(1.0, 1.0, 1.0) }, - multiValueContribution { values += listOf(2.0, 2.0, 2.0) }, - multiValueContribution { values += listOf(3.0, 3.0, 3.0) }, - ) + features += featureContribution { + multiValueContributions += + listOf( + multiValueContribution { values += listOf(1.0, 1.0, 1.0) }, + multiValueContribution { values += listOf(2.0, 2.0, 2.0) }, + multiValueContribution { values += listOf(3.0, 3.0, 3.0) }, + ) + } } ) diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsComputationalGraphTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsComputationalGraphTest.kt index fd27f4ed..86f16b10 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsComputationalGraphTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsComputationalGraphTest.kt @@ -110,10 +110,10 @@ class PrivatePartitionsComputationalGraphTest { verify(partitionSelectorMock).shouldKeep(2) // "partition2" assertThat(dpAggregates.keys).containsExactly("partition1", "partition2") - assertThat(dpAggregates.get("partition1")!!.count).isEqualTo(2.0) - assertThat(dpAggregates.get("partition1")!!.sum).isEqualTo(5.0) - assertThat(dpAggregates.get("partition2")!!.sum).isEqualTo(2.0) - assertThat(dpAggregates.get("partition2")!!.privacyIdCount).isEqualTo(1.0) + assertThat(dpAggregates["partition1"]!!.count).isEqualTo(2.0) + assertThat(dpAggregates["partition1"]!!.perFeatureList.first().sum).isEqualTo(5.0) + assertThat(dpAggregates["partition2"]!!.perFeatureList.first().sum).isEqualTo(2.0) + assertThat(dpAggregates["partition2"]!!.privacyIdCount).isEqualTo(1.0) } @Test @@ -237,7 +237,8 @@ class PrivatePartitionsComputationalGraphTest { // The user contributed to 3 partitions but maxPartitionsContributed is set to 2. Hence, // contributions to 2 partitions should appear in the result. assertThat(dpAggregates.values.map { it.count }).containsExactly(1.0, 1.0) - assertThat(dpAggregates.values.map { it.sum }).containsExactly(10.0, 10.0) + assertThat(dpAggregates.values.map { it.perFeatureList.first().sum }) + .containsExactly(10.0, 10.0) } @Test @@ -294,24 +295,27 @@ class PrivatePartitionsComputationalGraphTest { private companion object { val PRIVACY_ID_COUNT_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 10, maxContributionsPerPartition = 5, ) val COUNT_SUM_AND_ID_COUNT_PARAMS = AggregationParams( - metrics = + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(PRIVACY_ID_COUNT)), + features = ImmutableList.of( - MetricDefinition(COUNT), - MetricDefinition(SUM), - MetricDefinition(PRIVACY_ID_COUNT), + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -100.0, + maxTotalValue = 100.0, + ) ), noiseKind = GAUSSIAN, maxPartitionsContributed = 100, maxContributionsPerPartition = 100, - minTotalValue = -100.0, - maxTotalValue = 100.0, ) val METRICS_ALLOCATED_BUDGET = AllocatedBudget().apply { initialize(1.1, 1e-3) } // High epsilon/delta for partition selection. Partitions with ~10 privacy unit have ~1 @@ -334,6 +338,7 @@ class PrivatePartitionsComputationalGraphTest { METRICS_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_SUM_AND_ID_COUNT_PARAMS.features[0] as ScalarFeatureSpec, ), PrivacyIdCountCombiner( COUNT_SUM_AND_ID_COUNT_PARAMS, diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsTest.kt index e272bf7e..7d5cafb0 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsTest.kt @@ -39,7 +39,7 @@ class PrivatePartitionsTest { companion object { private val AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 3, maxContributionsPerPartition = 5, diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PublicPartitionsComputationalGraphTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PublicPartitionsComputationalGraphTest.kt index 740a725f..cb4ce87d 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PublicPartitionsComputationalGraphTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PublicPartitionsComputationalGraphTest.kt @@ -30,6 +30,7 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalEncoderFac import com.google.privacy.differentialprivacy.pipelinedp4j.local.LocalTable import com.google.privacy.differentialprivacy.pipelinedp4j.proto.DpAggregates import com.google.privacy.differentialprivacy.pipelinedp4j.proto.dpAggregates +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.perFeature import com.google.testing.junit.testparameterinjector.TestParameterInjector import com.google.testing.junit.testparameterinjector.TestParameters import com.google.testing.junit.testparameterinjector.TestParametersValuesProvider @@ -41,38 +42,48 @@ class PublicPartitionsComputationalGraphTest { companion object { private val COUNT_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = Int.MAX_VALUE, maxContributionsPerPartition = Int.MAX_VALUE, ) private val PRIVACY_ID_COUNT_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = Int.MAX_VALUE, ) private val SUM_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(SUM)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -Double.MAX_VALUE, + maxTotalValue = Double.MAX_VALUE, + ) + ), noiseKind = GAUSSIAN, maxPartitionsContributed = Int.MAX_VALUE, - minTotalValue = -Double.MAX_VALUE, - maxTotalValue = Double.MAX_VALUE, ) private val COUNT_SUM_AND_ID_COUNT_PARAMS = AggregationParams( - metrics = + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(PRIVACY_ID_COUNT)), + features = ImmutableList.of( - MetricDefinition(COUNT), - MetricDefinition(SUM), - MetricDefinition(PRIVACY_ID_COUNT), + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -100.0, + maxTotalValue = 100.0, + ) ), noiseKind = GAUSSIAN, maxPartitionsContributed = 100, maxContributionsPerPartition = 100, - minTotalValue = -100.0, - maxTotalValue = 100.0, ) private val ALLOCATED_BUDGET = AllocatedBudget() @@ -95,6 +106,7 @@ class PublicPartitionsComputationalGraphTest { ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_SUM_AND_ID_COUNT_PARAMS.features[0] as ScalarFeatureSpec, ), PrivacyIdCountCombiner( COUNT_SUM_AND_ID_COUNT_PARAMS, @@ -137,9 +149,10 @@ class PublicPartitionsComputationalGraphTest { assertThat(dpAggregates.data.toMap().keys) .containsExactly("public_present_in_data", "public_not_present_in_data") // Check that the value corresponding to the public partition not present in data is noisy - assertThat(dpAggregates.data.toMap().get("public_present_in_data")!!.count).isNotEqualTo(0.0) - assertThat(dpAggregates.data.toMap().get("public_present_in_data")!!.sum).isNotEqualTo(0.0) - assertThat(dpAggregates.data.toMap().get("public_present_in_data")!!.privacyIdCount) + assertThat(dpAggregates.data.toMap()["public_present_in_data"]!!.count).isNotEqualTo(0.0) + assertThat(dpAggregates.data.toMap()["public_present_in_data"]!!.perFeatureList.first().sum) + .isNotEqualTo(0.0) + assertThat(dpAggregates.data.toMap()["public_present_in_data"]!!.privacyIdCount) .isNotEqualTo(0.0) } @@ -196,6 +209,7 @@ class PublicPartitionsComputationalGraphTest { ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + SUM_PARAMS.features[0] as ScalarFeatureSpec, ) ) ), @@ -214,9 +228,33 @@ class PublicPartitionsComputationalGraphTest { .addParameter( "expectedResult", arrayOf( - Pair("red", dpAggregates { sum = 20.0 }), - Pair("green", dpAggregates { sum = 20.0 }), - Pair("blue", dpAggregates { sum = 10.0 }), + Pair( + "red", + dpAggregates { + perFeature += perFeature { + sum = 20.0 + featureId = "value" + } + }, + ), + Pair( + "green", + dpAggregates { + perFeature += perFeature { + sum = 20.0 + featureId = "value" + } + }, + ), + Pair( + "blue", + dpAggregates { + perFeature += perFeature { + sum = 10.0 + featureId = "value" + } + }, + ), ), ) .build(), @@ -278,24 +316,33 @@ class PublicPartitionsComputationalGraphTest { "red", dpAggregates { count = 2.0 - sum = 20.0 privacyIdCount = 1.0 + perFeature += perFeature { + sum = 20.0 + featureId = "value" + } }, ), Pair( "green", dpAggregates { count = 2.0 - sum = 20.0 privacyIdCount = 2.0 + perFeature += perFeature { + sum = 20.0 + featureId = "value" + } }, ), Pair( "blue", dpAggregates { count = 1.0 - sum = 10.0 privacyIdCount = 1.0 + perFeature += perFeature { + sum = 10.0 + featureId = "value" + } }, ), ), @@ -366,7 +413,8 @@ class PublicPartitionsComputationalGraphTest { // The user contributed to 3 partitions but maxPartitionsContributed is set to 2. Hence, // contributions to 2 partitions should appear in the result. assertThat(dpAggregates.data.toMap().values.map { it.count }).containsExactly(1.0, 1.0, 0.0) - assertThat(dpAggregates.data.toMap().values.map { it.sum }).containsExactly(10.0, 10.0, 0.0) + assertThat(dpAggregates.data.toMap().values.map { it.perFeatureList.firstOrNull()?.sum ?: 0.0 }) + .containsExactly(10.0, 10.0, 0.0) assertThat(dpAggregates.data.toMap().values.map { it.privacyIdCount }) .containsExactly(1.0, 1.0, 0.0) } diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/QuantilesCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/QuantilesCombinerTest.kt index a810ef35..85a6bec6 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/QuantilesCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/QuantilesCombinerTest.kt @@ -22,6 +22,7 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.core.NoiseKind.GAUSSI import com.google.privacy.differentialprivacy.pipelinedp4j.core.budget.AllocatedBudget import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.NoiseFactory import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.ZeroNoiseFactory +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions import com.google.testing.junit.testparameterinjector.TestParameterInjector import com.google.testing.junit.testparameterinjector.TestParameters @@ -32,12 +33,12 @@ import org.junit.runner.RunWith class QuantilesCombinerTest { private fun defaultQuantilesAggParams() = AggregationParams( - metrics = ImmutableList.of(), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of(ScalarFeatureSpec("value", ImmutableList.of(), -10000.0, 10000.0)), noiseKind = GAUSSIAN, maxPartitionsContributed = 1, maxContributionsPerPartition = 1, - minValue = -10000.0, - maxValue = 10000.0, ) @Test @@ -51,19 +52,37 @@ class QuantilesCombinerTest { allocatedBudget, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + defaultQuantilesAggParams().features[0] as ScalarFeatureSpec, ) val accumulator0 = combiner.emptyAccumulator() val accumulator1 = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(1.0, 3.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(1.0, 3.0) + } + } ) val accumulator2 = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(2.0, 4.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(2.0, 4.0) + } + } ) val accumulator3 = - combiner.createAccumulator(privacyIdContributions { singleValueContributions += listOf(5.0) }) + combiner.createAccumulator( + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(5.0) + } + } + ) val accumulator01 = combiner.mergeAccumulators(accumulator0, accumulator1) val accumulator012 = combiner.mergeAccumulators(accumulator01, accumulator2) val accumulator0123 = combiner.mergeAccumulators(accumulator3, accumulator012) @@ -79,13 +98,19 @@ class QuantilesCombinerTest { fun computeMetrics_noNoise_onlyEmptyAccumulator_returnsQuantilesBetweenMinMaxValues() { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-5) + val params = + defaultQuantilesAggParams() + .copy( + features = ImmutableList.of(ScalarFeatureSpec("value", ImmutableList.of(), -10.0, 10.0)) + ) val combiner = QuantilesCombiner( sortedRanks = listOf(0.0, 0.5, 1.0), - defaultQuantilesAggParams().copy(minValue = -10.0, maxValue = 10.0), + params, allocatedBudget, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val quantiles = combiner.computeMetrics(combiner.emptyAccumulator()) @@ -101,19 +126,29 @@ class QuantilesCombinerTest { fun computeMetrics_smallNoise_returnsQuantilesCloseToReal(noiseKind: NoiseKind, delta: Double) { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(100.0, delta) + val params = + defaultQuantilesAggParams() + .copy( + features = ImmutableList.of(ScalarFeatureSpec("value", ImmutableList.of(), 1.0, 1000.0)), + noiseKind = noiseKind, + ) val combiner = QuantilesCombiner( sortedRanks = listOf(0.0, 0.5, 1.0), - defaultQuantilesAggParams().copy(minValue = 1.0, maxValue = 1000.0, noiseKind = noiseKind), + params, allocatedBudget, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.createAccumulator( privacyIdContributions { - singleValueContributions += (1..1000).map { it.toDouble() }.toList() + features += featureContribution { + featureId = "value" + singleValueContributions += (1..1000).map { it.toDouble() }.toList() + } } ) val quantiles = combiner.computeMetrics(accumulator) diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/SumCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/SumCombinerTest.kt index fe8420d0..06725c49 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/SumCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/SumCombinerTest.kt @@ -26,6 +26,7 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.core.NoiseKind.GAUSSI import com.google.privacy.differentialprivacy.pipelinedp4j.core.budget.AllocatedBudget import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.NoiseFactory import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.ZeroNoiseFactory +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions import com.google.privacy.differentialprivacy.pipelinedp4j.proto.sumAccumulator import com.google.testing.junit.testparameterinjector.TestParameterInjector @@ -39,10 +40,17 @@ import org.mockito.kotlin.verify class SumCombinerTest { private val SUM_AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(SUM)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -1.0, + maxTotalValue = 3.0, + ) + ), noiseKind = GAUSSIAN, - minTotalValue = -1.0, - maxTotalValue = 3.0, maxPartitionsContributed = 5, ) @@ -56,81 +64,166 @@ class SumCombinerTest { @Test fun emptyAccumulator_minIsGreaterThanZero_returnsZeroAndIgnoresContributionBounds() { + val params = + SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = 1.0, + maxTotalValue = 2.0, + ) + ) + ) val combiner = SumCombiner( - SUM_AGG_PARAMS.copy(minTotalValue = 1.0, maxTotalValue = 2.0), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.emptyAccumulator() - assertThat(accumulator).isEqualTo(sumAccumulator { sum = 0.0 }) + assertThat(accumulator) + .isEqualTo( + sumAccumulator { + featureId = "value" + sum = 0.0 + } + ) } @Test fun createAccumulator_sumsItems() { + val params = + SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -300.0, + maxTotalValue = 300.0, + ) + ) + ) val combiner = SumCombiner( - SUM_AGG_PARAMS.copy(minTotalValue = -300.0, maxTotalValue = 300.0), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.TEST_MODE_WITH_CONTRIBUTION_BOUNDING, + params.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(-10.0, 15.0, 0.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(-10.0, 15.0, 0.0) + } + } ) - assertThat(accumulator).isEqualTo(sumAccumulator { sum = 5.0 }) + assertThat(accumulator) + .isEqualTo( + sumAccumulator { + featureId = "value" + sum = 5.0 + } + ) } @Test fun createAccumulator_privacyLevelWithContributionBounding_clampsOnlyTotalSum() { + val params = + SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minValue = -1.0, + maxValue = 4.0, + minTotalValue = -2.0, + maxTotalValue = 300.0, + ) + ) + ) val combiner = SumCombiner( - SUM_AGG_PARAMS.copy( - minValue = -1.0, - maxValue = 4.0, - minTotalValue = -2.0, - maxTotalValue = 300.0, - ), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.TEST_MODE_WITH_CONTRIBUTION_BOUNDING, + params.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(-1000.0, 1000.0, 500.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(-1000.0, 1000.0, 500.0) + } + } ) - assertThat(accumulator).isEqualTo(sumAccumulator { sum = 300.0 }) + assertThat(accumulator) + .isEqualTo( + sumAccumulator { + featureId = "value" + sum = 300.0 + } + ) } @Test fun createAccumulator_fullTestMode_doesNotClampTotalSum() { + val params = + SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minValue = -1.0, + maxValue = 4.0, + minTotalValue = -2.0, + maxTotalValue = 300.0, + ) + ) + ) val combiner = SumCombiner( - SUM_AGG_PARAMS.copy( - minValue = -1.0, - maxValue = 4.0, - minTotalValue = -2.0, - maxTotalValue = 300.0, - ), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), FULL_TEST_MODE, + params.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(-1000.0, 1000.0, 500.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(-1000.0, 1000.0, 500.0) + } + } ) - assertThat(accumulator).isEqualTo(sumAccumulator { sum = 500.0 }) + assertThat(accumulator) + .isEqualTo( + sumAccumulator { + featureId = "value" + sum = 500.0 + } + ) } @Test @@ -141,12 +234,28 @@ class SumCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.TEST_MODE_WITH_CONTRIBUTION_BOUNDING, + SUM_AGG_PARAMS.features[0] as ScalarFeatureSpec, ) val accumulator = - combiner.mergeAccumulators(sumAccumulator { sum = 1000.0 }, sumAccumulator { sum = -2000.0 }) + combiner.mergeAccumulators( + sumAccumulator { + featureId = "value" + sum = 1000.0 + }, + sumAccumulator { + featureId = "value" + sum = -2000.0 + }, + ) - assertThat(accumulator).isEqualTo(sumAccumulator { sum = -1000.0 }) + assertThat(accumulator) + .isEqualTo( + sumAccumulator { + featureId = "value" + sum = -1000.0 + } + ) } @Test @@ -160,6 +269,7 @@ class SumCombinerTest { allocatedBudget, NoiseFactory(), ExecutionMode.PRODUCTION, + SUM_AGG_PARAMS.features[0] as ScalarFeatureSpec, ) val result = combiner.computeMetrics(sumAccumulator { sum = 1.0 }) @@ -171,17 +281,27 @@ class SumCombinerTest { fun computeMetrics_passesCorrectParametersToNoise() { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-3) + val params = + SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -4.0, + maxTotalValue = 3.0, + ) + ), + noiseKind = GAUSSIAN, + maxPartitionsContributed = 10, + ) val combiner = SumCombiner( - SUM_AGG_PARAMS.copy( - noiseKind = GAUSSIAN, - maxPartitionsContributed = 10, - minTotalValue = -4.0, - maxTotalValue = 3.0, - ), + params, allocatedBudget, noiseFactoryMock, ExecutionMode.TEST_MODE_WITH_CONTRIBUTION_BOUNDING, + params.features[0] as ScalarFeatureSpec, ) val unused = combiner.computeMetrics(sumAccumulator { sum = 1.0 }) @@ -200,22 +320,45 @@ class SumCombinerTest { fun computeMetrics_withoutNoise_withMultipleContributionsIncludingEmptyAccumulator_returnsCorrectResult() { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-5) + val params = + SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -1.0, + maxTotalValue = 3.0, + ) + ) + ) val combiner = SumCombiner( - SUM_AGG_PARAMS.copy(minTotalValue = -1.0, maxTotalValue = 3.0), + params, allocatedBudget, ZeroNoiseFactory(), ExecutionMode.TEST_MODE_WITH_CONTRIBUTION_BOUNDING, + params.features[0] as ScalarFeatureSpec, ) val accumulator0 = combiner.emptyAccumulator() val accumulator1 = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(-2.0, 3.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(-2.0, 3.0) + } + } ) val accumulator2 = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(4.0, -1.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(4.0, -1.0) + } + } ) val accumulator3 = combiner.mergeAccumulators(accumulator0, accumulator1) val finalAccumulator = combiner.mergeAccumulators(accumulator2, accumulator3) @@ -228,12 +371,25 @@ class SumCombinerTest { fun computeMetrics_withoutNoiseAndEmptyAccumulatorThenMerged_returnsZeroSum() { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-5) + val params = + SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -1.0, + maxTotalValue = 3.0, + ) + ) + ) val combiner = SumCombiner( - SUM_AGG_PARAMS.copy(minTotalValue = -1.0, maxTotalValue = 3.0), + params, allocatedBudget, ZeroNoiseFactory(), ExecutionMode.TEST_MODE_WITH_CONTRIBUTION_BOUNDING, + params.features[0] as ScalarFeatureSpec, ) val result = combiner.computeMetrics(combiner.emptyAccumulator()) diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VarianceCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VarianceCombinerTest.kt index d5970145..ec514df2 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VarianceCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VarianceCombinerTest.kt @@ -28,6 +28,7 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.core.MetricType.VARIA import com.google.privacy.differentialprivacy.pipelinedp4j.core.budget.AllocatedBudget import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.NoiseFactory import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.ZeroNoiseFactory +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions import com.google.privacy.differentialprivacy.pipelinedp4j.proto.varianceAccumulator import com.google.testing.junit.testparameterinjector.TestParameter @@ -50,6 +51,7 @@ class VarianceCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + AGG_PARAMS.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.emptyAccumulator() @@ -57,6 +59,7 @@ class VarianceCombinerTest { assertThat(accumulator) .isEqualTo( varianceAccumulator { + featureId = "value" count = 0 normalizedSum = 0.0 normalizedSumSquares = 0.0 @@ -66,22 +69,43 @@ class VarianceCombinerTest { @Test fun createAccumulator_doesNotClampContributionsWithinBounds() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = -8.0, + maxValue = 12.0, + ) + ) + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy(minValue = -8.0, maxValue = 12.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = - combiner.createAccumulator(privacyIdContributions { singleValueContributions += listOf(5.5) }) + combiner.createAccumulator( + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(5.5) + } + } + ) // midValue is the midpoint between minValue = -8.0 and maxValue = 12.0 = 2 assertThat(accumulator) .isEqualTo( varianceAccumulator { + featureId = "value" count = 1 normalizedSum = 3.5 // = 5.5 - 2.0 = contribution - midValue normalizedSumSquares = 12.25 // (5.5 - 2.0)^2 = (contribution - midValue)^2 @@ -91,24 +115,43 @@ class VarianceCombinerTest { @Test fun createAccumulator_privacyLevelWithContributionBounding_clampssingleValueContributions() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = -10.0, + maxValue = 10.0, + ) + ) + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy(minValue = -10.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(-20.0, 30.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(-20.0, 30.0) + } + } ) // midValue is the midpoint between minValue = -10.0 and maxValue = 10.0 = 0 assertThat(accumulator) .isEqualTo( varianceAccumulator { + featureId = "value" count = 2 normalizedSum = 0.0 // (-10.0 - 0) + (10.0 - 0) = two clamped contributions minus midValue normalizedSumSquares = @@ -119,24 +162,43 @@ class VarianceCombinerTest { @Test fun createAccumulator_fullTestMode_doesNotClampSingleValueContributions() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = -10.0, + maxValue = 10.0, + ) + ) + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy(minValue = -10.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), FULL_TEST_MODE, + params.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(-20.0, 30.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(-20.0, 30.0) + } + } ) // midValue is the midpoint between minValue = -10.0 and maxValue = 10.0 = 0 assertThat(accumulator) .isEqualTo( varianceAccumulator { + featureId = "value" count = 2 normalizedSum = 10.0 // (-20.0 - 0.0) + (30.0 - 0.0) Not clamped normalizedSumSquares = 1300.0 // (-20.0 - 0.0)^2 + (30.0 - 0.0)^2 Not clamped @@ -146,22 +208,43 @@ class VarianceCombinerTest { @Test fun createAccumulator_normalizesSumAndSumOfSquares() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = 5.0, + maxValue = 10.0, + ) + ) + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy(minValue = 5.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = - combiner.createAccumulator(privacyIdContributions { singleValueContributions += listOf(6.0) }) + combiner.createAccumulator( + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(6.0) + } + } + ) assertThat(accumulator) .isEqualTo( varianceAccumulator { + featureId = "value" count = 1 normalizedSum = -1.5 normalizedSumSquares = (-1.5) * (-1.5) @@ -171,24 +254,43 @@ class VarianceCombinerTest { @Test fun createAccumulator_normalizationAndClamping() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = 5.0, + maxValue = 10.0, + ) + ) + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy(minValue = 5.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(30.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(30.0) + } + } ) assertThat(accumulator) .isEqualTo( varianceAccumulator { + featureId = "value" count = 1 normalizedSum = 2.5 normalizedSumSquares = 2.5 * 2.5 @@ -198,25 +300,44 @@ class VarianceCombinerTest { @Test fun createAccumulator_aggregatesMultipleElements() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = 4.0, + maxValue = 10.0, + ) + ) + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy(minValue = 4.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) // Create list with one value that is clamped to min value. val accumulator = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(3.0, 5.5, 6.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(3.0, 5.5, 6.0) + } + } ) assertThat(accumulator) .isEqualTo( varianceAccumulator { + featureId = "value" count = 3 normalizedSum = -5.5 // = sum of normalized singleValueContributions = -3 - 1.5 - 1 normalizedSumSquares = 12.25 // sum of each normalized value squared @@ -234,16 +355,19 @@ class VarianceCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + AGG_PARAMS.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.mergeAccumulators( varianceAccumulator { + featureId = "value" count = 1 normalizedSum = -5.0 normalizedSumSquares = 25.0 }, varianceAccumulator { + featureId = "value" count = 10 normalizedSum = 8.5 normalizedSumSquares = 72.5 @@ -253,6 +377,7 @@ class VarianceCombinerTest { assertThat(accumulator) .isEqualTo( varianceAccumulator { + featureId = "value" count = 11 normalizedSum = 3.5 normalizedSumSquares = 97.5 @@ -271,20 +396,30 @@ class VarianceCombinerTest { val noise: Noise = mock() val noiseFactory: (NoiseKind) -> Noise = { _ -> noise } + val params = + AGG_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(VARIANCE)), + minValue = 4.0, + maxValue = 10.0, + ) + ), + maxPartitionsContributed = 5, + maxContributionsPerPartition = 7, + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(VARIANCE)), - maxPartitionsContributed = 5, - maxContributionsPerPartition = 7, - minValue = 4.0, - maxValue = 10.0, - ), + params, countBudget, sumBudget, sumSquaresBudget, noiseFactory, ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = varianceAccumulator { @@ -334,27 +469,36 @@ class VarianceCombinerTest { val sumSquaresBudget = AllocatedBudget() sumSquaresBudget.initialize(10000.0, 0.0) + val params = + AGG_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of( + MetricDefinition(VARIANCE), + MetricDefinition(MEAN), + MetricDefinition(SUM), + ), + minValue = 4.0, + maxValue = 12.0, + ) + ), + maxPartitionsContributed = 5, + maxContributionsPerPartition = 7, + noiseKind = NoiseKind.LAPLACE, + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy( - metrics = - ImmutableList.of( - MetricDefinition(VARIANCE), - MetricDefinition(MEAN), - MetricDefinition(SUM), - MetricDefinition(COUNT), - ), - maxPartitionsContributed = 5, - maxContributionsPerPartition = 7, - minValue = 4.0, - maxValue = 12.0, - noiseKind = NoiseKind.LAPLACE, - ), + params, countBudget, sumBudget, sumSquaresBudget, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = varianceAccumulator { @@ -375,43 +519,44 @@ class VarianceCombinerTest { } enum class ReturnedMetricsTestCase( - val requestedMetrics: ImmutableList, + val nonFeatureMetrics: ImmutableList, + val featureMetrics: ImmutableList, val countExpected: Boolean, val sumExpected: Boolean, val meanExpected: Boolean, ) { NO_SUM_NO_COUNT_NO_MEAN( - requestedMetrics = ImmutableList.of(MetricDefinition(VARIANCE)), + nonFeatureMetrics = ImmutableList.of(), + featureMetrics = ImmutableList.of(MetricDefinition(VARIANCE)), countExpected = false, sumExpected = false, meanExpected = false, ), ONLY_SUM( - requestedMetrics = ImmutableList.of(MetricDefinition(VARIANCE), MetricDefinition(SUM)), + nonFeatureMetrics = ImmutableList.of(), + featureMetrics = ImmutableList.of(MetricDefinition(VARIANCE), MetricDefinition(SUM)), countExpected = false, sumExpected = true, meanExpected = false, ), ONLY_COUNT( - requestedMetrics = ImmutableList.of(MetricDefinition(VARIANCE), MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + featureMetrics = ImmutableList.of(MetricDefinition(VARIANCE)), countExpected = true, sumExpected = false, meanExpected = false, ), ONLY_MEAN( - requestedMetrics = ImmutableList.of(MetricDefinition(VARIANCE), MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(), + featureMetrics = ImmutableList.of(MetricDefinition(VARIANCE), MetricDefinition(MEAN)), countExpected = false, sumExpected = false, meanExpected = true, ), COUNT_AND_SUM_AND_MEAN( - requestedMetrics = - ImmutableList.of( - MetricDefinition(VARIANCE), - MetricDefinition(MEAN), - MetricDefinition(SUM), - MetricDefinition(COUNT), - ), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + featureMetrics = + ImmutableList.of(MetricDefinition(VARIANCE), MetricDefinition(MEAN), MetricDefinition(SUM)), countExpected = true, sumExpected = true, meanExpected = true, @@ -422,17 +567,29 @@ class VarianceCombinerTest { fun aggregate_computeMetrics_checkWhichMetricReturned( @TestParameter testCase: ReturnedMetricsTestCase ) { + val features: ImmutableList = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = testCase.featureMetrics, + minValue = -10.0, + maxValue = 10.0, + ) + ) + val params = + AGG_PARAMS.copy(nonFeatureMetrics = testCase.nonFeatureMetrics, features = features) val combiner = VarianceCombiner( - AGG_PARAMS.copy(metrics = testCase.requestedMetrics), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) - val metrics = + val result = combiner.computeMetrics( varianceAccumulator { count = 10 @@ -441,49 +598,78 @@ class VarianceCombinerTest { } ) if (testCase.countExpected) { - assertThat(metrics.count).isNotNull() + assertThat(result.count).isNotNull() } else { - assertThat(metrics.count).isNull() + assertThat(result.count).isNull() } if (testCase.sumExpected) { - assertThat(metrics.sum).isNotNull() + assertThat(result.sum).isNotNull() } else { - assertThat(metrics.sum).isNull() + assertThat(result.sum).isNull() } if (testCase.meanExpected) { - assertThat(metrics.mean).isNotNull() + assertThat(result.mean).isNotNull() } else { - assertThat(metrics.mean).isNull() + assertThat(result.mean).isNull() } } @Test fun computeMetrics_withoutNoise_withMultipleContributionsIncludingEmptyAccumulator_returnsCorrectResult() { + val params = + AGG_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = -10.0, + maxValue = 10.0, + ) + ), + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy( - ImmutableList.of(MetricDefinition(VARIANCE)), - minValue = -10.0, - maxValue = 10.0, - ), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator0 = combiner.emptyAccumulator() val accumulator1 = combiner.createAccumulator( - privacyIdContributions { singleValueContributions += listOf(10.0, -10.0) } + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(10.0, -10.0) + } + } ) val accumulator2 = - combiner.createAccumulator(privacyIdContributions { singleValueContributions += listOf(9.0) }) + combiner.createAccumulator( + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(9.0) + } + } + ) val accumulator3 = - combiner.createAccumulator(privacyIdContributions { singleValueContributions += listOf(0.0) }) + combiner.createAccumulator( + privacyIdContributions { + features += featureContribution { + featureId = "value" + singleValueContributions += listOf(0.0) + } + } + ) val accumulator01 = combiner.mergeAccumulators(accumulator0, accumulator1) val accumulator23 = combiner.mergeAccumulators(accumulator2, accumulator3) val finalAccumulator = combiner.mergeAccumulators(accumulator01, accumulator23) @@ -495,23 +681,33 @@ class VarianceCombinerTest { @Test fun computeMetrics_withoutNoise_onlyEmptyAccumulator_returnsZeroCountAndNaNForCountMeanAndVariance() { - val combiner = - VarianceCombiner( - AGG_PARAMS.copy( + val params = + AGG_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = ImmutableList.of( - MetricDefinition(VARIANCE), - MetricDefinition(MEAN), - MetricDefinition(SUM), - MetricDefinition(COUNT), + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of( + MetricDefinition(VARIANCE), + MetricDefinition(MEAN), + MetricDefinition(SUM), + ), + minValue = 4.0, + maxValue = 10.0, + ) ), - minValue = 4.0, - maxValue = 10.0, - ), + ) + val combiner = + VarianceCombiner( + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val result = combiner.computeMetrics(combiner.emptyAccumulator()) @@ -530,12 +726,19 @@ class VarianceCombinerTest { companion object { private val AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(VARIANCE)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(VARIANCE)), + minValue = -10.0, + maxValue = 10.0, + ) + ), noiseKind = NoiseKind.GAUSSIAN, maxPartitionsContributed = 3, maxContributionsPerPartition = 5, - minValue = -10.0, - maxValue = 10.0, ) private val UNUSED_ALLOCATED_BUDGET = AllocatedBudget() diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VectorSumCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VectorSumCombinerTest.kt index 2bc9ee42..3c0de499 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VectorSumCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VectorSumCombinerTest.kt @@ -29,6 +29,7 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.core.NoiseKind.GAUSSI import com.google.privacy.differentialprivacy.pipelinedp4j.core.budget.AllocatedBudget import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.NoiseFactory import com.google.privacy.differentialprivacy.pipelinedp4j.dplibrary.ZeroNoiseFactory +import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.featureContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.multiValueContribution import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions import com.google.privacy.differentialprivacy.pipelinedp4j.proto.vectorSumAccumulator @@ -44,11 +45,18 @@ import org.mockito.kotlin.verifyNoMoreInteractions class VectorSumCombinerTest { private val VECTOR_SUM_AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = NormKind.L_INF, + vectorMaxTotalNorm = 3.0, + ) + ), noiseKind = GAUSSIAN, - vectorNormKind = NormKind.L_INF, - vectorMaxTotalNorm = 3.0, - vectorSize = 3, maxPartitionsContributed = 5, ) @@ -60,169 +68,278 @@ class VectorSumCombinerTest { @Test fun emptyAccumulator_returnsZeroVector() { + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = NormKind.L_INF, + vectorMaxTotalNorm = 3.0, + ) + ) + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy(vectorSize = 3), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val accumulator = combiner.emptyAccumulator() assertThat(accumulator) - .isEqualTo(vectorSumAccumulator { sumsPerDimension += listOf(0.0, 0.0, 0.0) }) + .isEqualTo( + vectorSumAccumulator { + featureId = "value" + sumsPerDimension += listOf(0.0, 0.0, 0.0) + } + ) } @Test fun createAccumulator_sumsVectors() { + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = NormKind.L1, + vectorMaxTotalNorm = 300.0, + ) + ) + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = NormKind.L1, - vectorMaxTotalNorm = 300.0, - vectorSize = 3, - ), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val accumulator = combiner.createAccumulator( privacyIdContributions { - multiValueContributions += - listOf( - multiValueContribution { values += listOf(-10.0, 15.0, 0.0) }, - multiValueContribution { values += listOf(10.0, 20.0, -1.0) }, - ) + features += featureContribution { + featureId = "value" + multiValueContributions += + listOf( + multiValueContribution { values += listOf(-10.0, 15.0, 0.0) }, + multiValueContribution { values += listOf(10.0, 20.0, -1.0) }, + ) + } } ) // The vector sum is [0.0, 35.0, 1.0], which has L1 norm of 36. // The max norm is 300 > 36, so the vector is not clipped. assertThat(accumulator) - .isEqualTo(vectorSumAccumulator { sumsPerDimension += listOf(0.0, 35.0, -1.0) }) + .isEqualTo( + vectorSumAccumulator { + featureId = "value" + sumsPerDimension += listOf(0.0, 35.0, -1.0) + } + ) } @Test fun createAccumulator_perPartitionContributionBoundingEnabledLInfNorm_clampsOnlyTotalVectorSum() { + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = NormKind.L_INF, + vectorMaxTotalNorm = 30.0, + ) + ), + contributionBoundingLevel = PARTITION_LEVEL, + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = NormKind.L_INF, - vectorMaxTotalNorm = 30.0, - vectorSize = 3, - contributionBoundingLevel = PARTITION_LEVEL, - ), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val accumulator = combiner.createAccumulator( privacyIdContributions { - multiValueContributions += - listOf( - multiValueContribution { values += listOf(-10.0, 75.0, 0.0) }, - multiValueContribution { values += listOf(10.0, -40.0, -1.0) }, - ) + features += featureContribution { + featureId = "value" + multiValueContributions += + listOf( + multiValueContribution { values += listOf(-10.0, 75.0, 0.0) }, + multiValueContribution { values += listOf(10.0, -40.0, -1.0) }, + ) + } } ) // The vector sum is [0.0, 35.0, 1.0], which has L_INF norm of 35. // Each component is clipped to -30, 30. assertThat(accumulator) - .isEqualTo(vectorSumAccumulator { sumsPerDimension += listOf(0.0, 30.0, -1.0) }) + .isEqualTo( + vectorSumAccumulator { + featureId = "value" + sumsPerDimension += listOf(0.0, 30.0, -1.0) + } + ) } @Test fun createAccumulator_perPartitionContributionBoundingEnabledL1Norm_clampsOnlyTotalVectorSum() { + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 2, + normKind = NormKind.L1, + vectorMaxTotalNorm = 10.0, + ) + ) + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = NormKind.L1, - vectorMaxTotalNorm = 10.0, - vectorSize = 2, - ), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val accumulator = combiner.createAccumulator( privacyIdContributions { - multiValueContributions += - listOf( - multiValueContribution { values += listOf(-4.0, 2.0) }, - multiValueContribution { values += listOf(-5.0, 1.0) }, - multiValueContribution { values += listOf(-3.0, 1.0) }, - ) + features += featureContribution { + featureId = "value" + multiValueContributions += + listOf( + multiValueContribution { values += listOf(-4.0, 2.0) }, + multiValueContribution { values += listOf(-5.0, 1.0) }, + multiValueContribution { values += listOf(-3.0, 1.0) }, + ) + } } ) assertThat(accumulator) - .isEqualTo(vectorSumAccumulator { sumsPerDimension += listOf(-7.5, 2.5) }) + .isEqualTo( + vectorSumAccumulator { + featureId = "value" + sumsPerDimension += listOf(-7.5, 2.5) + } + ) } @Test fun createAccumulator_perPartitionContributionBoundingEnabledL2Norm_clampsOnlyTotalVectorSum() { + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 2, + normKind = NormKind.L2, + vectorMaxTotalNorm = 6.5, + ) + ) + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = NormKind.L2, - vectorMaxTotalNorm = 6.5, - vectorSize = 2, - ), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val accumulator = combiner.createAccumulator( privacyIdContributions { - multiValueContributions += - listOf( - multiValueContribution { values += listOf(-10.0, 2.0) }, - multiValueContribution { values += listOf(-2.0, 3.0) }, - ) + features += featureContribution { + featureId = "value" + multiValueContributions += + listOf( + multiValueContribution { values += listOf(-10.0, 2.0) }, + multiValueContribution { values += listOf(-2.0, 3.0) }, + ) + } } ) assertThat(accumulator) - .isEqualTo(vectorSumAccumulator { sumsPerDimension += listOf(-6.0, 2.5) }) + .isEqualTo( + vectorSumAccumulator { + featureId = "value" + sumsPerDimension += listOf(-6.0, 2.5) + } + ) } @Test fun createAccumulator_fullTestMode_doesNotClampTotalSum() { + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = NormKind.L_INF, + vectorMaxTotalNorm = 30.0, + ) + ) + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = NormKind.L_INF, - vectorMaxTotalNorm = 30.0, - vectorSize = 3, - ), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), FULL_TEST_MODE, + params.features[0] as VectorFeatureSpec, ) val accumulator = combiner.createAccumulator( privacyIdContributions { - multiValueContributions += - listOf( - multiValueContribution { values += listOf(-10.0, 15.0, 0.0) }, - multiValueContribution { values += listOf(10.0, 20.0, -1.0) }, - ) + features += featureContribution { + featureId = "value" + multiValueContributions += + listOf( + multiValueContribution { values += listOf(-10.0, 15.0, 0.0) }, + multiValueContribution { values += listOf(10.0, 20.0, -1.0) }, + ) + } } ) assertThat(accumulator) - .isEqualTo(vectorSumAccumulator { sumsPerDimension += listOf(0.0, 35.0, -1.0) }) + .isEqualTo( + vectorSumAccumulator { + featureId = "value" + sumsPerDimension += listOf(0.0, 35.0, -1.0) + } + ) } @Test @@ -233,16 +350,28 @@ class VectorSumCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + VECTOR_SUM_AGG_PARAMS.features[0] as VectorFeatureSpec, ) val accumulator = combiner.mergeAccumulators( - vectorSumAccumulator { sumsPerDimension += listOf(0.0, 35.0, 1.0) }, - vectorSumAccumulator { sumsPerDimension += listOf(-10.0, 0.0, 1.0) }, + vectorSumAccumulator { + featureId = "value" + sumsPerDimension += listOf(0.0, 35.0, 1.0) + }, + vectorSumAccumulator { + featureId = "value" + sumsPerDimension += listOf(-10.0, 0.0, 1.0) + }, ) assertThat(accumulator) - .isEqualTo(vectorSumAccumulator { sumsPerDimension += listOf(-10.0, 35.0, 2.0) }) + .isEqualTo( + vectorSumAccumulator { + featureId = "value" + sumsPerDimension += listOf(-10.0, 35.0, 2.0) + } + ) } @Test @@ -250,12 +379,14 @@ class VectorSumCombinerTest { fun computeMetrics_addsNoise(noiseKind: NoiseKind, delta: Double) { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, delta) + val params = VECTOR_SUM_AGG_PARAMS.copy(noiseKind = noiseKind) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy(noiseKind = noiseKind), + params, allocatedBudget, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val result = @@ -280,17 +411,27 @@ class VectorSumCombinerTest { val noiseFactoryMock: (NoiseKind) -> Noise = { _ -> noiseMock } val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-3) + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = normKind, + vectorMaxTotalNorm = 30.0, + ) + ), + maxPartitionsContributed = 10, + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = normKind, - vectorMaxTotalNorm = 30.0, - vectorSize = 3, - maxPartitionsContributed = 10, - ), + params, allocatedBudget, noiseFactoryMock, ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val unused = @@ -336,17 +477,27 @@ class VectorSumCombinerTest { val noiseFactoryMock: (NoiseKind) -> Noise = { _ -> noiseMock } val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-3) + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 4, + normKind = normKind, + vectorMaxTotalNorm = 30.0, + ) + ), + maxPartitionsContributed = 100, + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = normKind, - vectorMaxTotalNorm = 30.0, - vectorSize = 4, - maxPartitionsContributed = 100, - ), + params, allocatedBudget, noiseFactoryMock, ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val unused = @@ -389,35 +540,50 @@ class VectorSumCombinerTest { fun computeMetrics_withoutNoise_withMultipleContributionsIncludingEmptyAccumulator_returnsCorrectResult() { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-5) + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = NormKind.L2, + vectorMaxTotalNorm = 30.0, + ) + ), + maxPartitionsContributed = 10, + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = NormKind.L2, - vectorMaxTotalNorm = 30.0, - vectorSize = 3, - maxPartitionsContributed = 10, - ), + params, allocatedBudget, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val accumulator0 = combiner.emptyAccumulator() val accumulator1 = combiner.createAccumulator( privacyIdContributions { - multiValueContributions += - listOf( - multiValueContribution { values += listOf(-10.0, 15.0, 1.0) }, - multiValueContribution { values += listOf(10.0, 20.0, -1.0) }, - ) + features += featureContribution { + featureId = "value" + multiValueContributions += + listOf( + multiValueContribution { values += listOf(-10.0, 15.0, 1.0) }, + multiValueContribution { values += listOf(10.0, 20.0, -1.0) }, + ) + } } ) val accumulator2 = combiner.createAccumulator( privacyIdContributions { - multiValueContributions += - listOf(multiValueContribution { values += listOf(3.0, 0.0, 4.0) }) + features += featureContribution { + featureId = "value" + multiValueContributions.add(multiValueContribution { values += listOf(3.0, 0.0, 4.0) }) + } } ) val accumulator3 = combiner.mergeAccumulators(accumulator0, accumulator1) @@ -434,12 +600,26 @@ class VectorSumCombinerTest { fun computeMetrics_withoutNoiseAndEmptyAccumulator_returnsZeroVectorSum() { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-5) + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = NormKind.L_INF, + vectorMaxTotalNorm = 3.0, + ) + ) + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy(vectorSize = 3), + params, allocatedBudget, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val result = combiner.computeMetrics(combiner.emptyAccumulator()) diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/spark/SparkEncodersTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/spark/SparkEncodersTest.kt index 974c89a8..2047788a 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/spark/SparkEncodersTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/spark/SparkEncodersTest.kt @@ -81,12 +81,17 @@ class SparkEncodersTest { val input = listOf( compoundAccumulator { - sumAccumulator = sumAccumulator { sum = -123.0 } - meanAccumulator = meanAccumulator { + sumAccumulators += sumAccumulator { + featureId = "test_feature" + sum = -123.0 + } + meanAccumulators += meanAccumulator { + featureId = "test_feature" count = 12 normalizedSum = -1.543 } - quantilesAccumulator = quantilesAccumulator { + quantilesAccumulators += quantilesAccumulator { + featureId = "test_feature" serializedQuantilesSummary = ByteString.copyFrom(byteArrayOf(0x48, 0x65, 0x6c, 0x6c, 0x6f)) }