diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/AggregationSpec.kt b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/AggregationSpec.kt index 441b091f..6974f41e 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/AggregationSpec.kt +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/AggregationSpec.kt @@ -148,58 +148,20 @@ internal fun AggregationSpec.getFeatureId(): String { } } -internal fun List.metrics(): List = buildList { - for (aggregation in this@metrics) { - when (aggregation) { - // Count and PrivacyIdCount do not aggregate any specific value, therefore they are handled - // differently. - is PrivacyIdCount -> - add( - MetricDefinition( - MetricType.PRIVACY_ID_COUNT, - aggregation.budget?.toInternalBudgetPerOpSpec(), - ) - ) - is Count -> - add(MetricDefinition(MetricType.COUNT, aggregation.budget?.toInternalBudgetPerOpSpec())) - is ValueAggregations<*> -> { - for (valueAggregationSpec in aggregation.valueAggregationSpecs) { - add( - MetricDefinition( - valueAggregationSpec.metricType, - valueAggregationSpec.budget?.toInternalBudgetPerOpSpec(), - ) - ) - } - } - is VectorAggregations<*> -> { - for (vectorAggregationSpec in aggregation.vectorAggregationSpecs) { - add( - MetricDefinition( - vectorAggregationSpec.metricType, - vectorAggregationSpec.budget?.toInternalBudgetPerOpSpec(), - ) - ) - } - } - } - } -} - internal fun List.outputColumnNamesWithMetricTypes(): List> = buildList { for (aggregation in this@outputColumnNamesWithMetricTypes) { when (aggregation) { - is PrivacyIdCount -> add(aggregation.outputColumnName to MetricType.PRIVACY_ID_COUNT) - is Count -> add(aggregation.outputColumnName to MetricType.COUNT) + is PrivacyIdCount -> add(Pair(aggregation.outputColumnName, MetricType.PRIVACY_ID_COUNT)) + is Count -> add(Pair(aggregation.outputColumnName, MetricType.COUNT)) is ValueAggregations<*> -> { for (valueAggregationSpec in aggregation.valueAggregationSpecs) { - add(valueAggregationSpec.outputColumnName to valueAggregationSpec.metricType) + add(Pair(valueAggregationSpec.outputColumnName, valueAggregationSpec.metricType)) } } is VectorAggregations<*> -> { for (vectorAggregationSpec in aggregation.vectorAggregationSpecs) { - add(vectorAggregationSpec.outputColumnName to vectorAggregationSpec.metricType) + add(Pair(vectorAggregationSpec.outputColumnName, vectorAggregationSpec.metricType)) } } } @@ -227,3 +189,22 @@ internal fun List.outputColumnNameToFeatureIdMap(): Map.outputColumnNames(): List = outputColumnNamesWithMetricTypes().map { it.first } + +internal fun AggregationSpec.toNonFeatureMetricDefinition(): MetricDefinition { + val (metricType, budget) = + when (this) { + is Count -> Pair(MetricType.COUNT, this.budget) + is PrivacyIdCount -> Pair(MetricType.PRIVACY_ID_COUNT, this.budget) + else -> + throw IllegalArgumentException("Unsupported AggregationSpec type for non feature metrics") + } + return MetricDefinition(metricType, budget?.toInternalBudgetPerOpSpec()) +} + +internal fun ValueAggregationSpec.toMetricDefinition(): MetricDefinition { + return MetricDefinition(this.metricType, this.budget?.toInternalBudgetPerOpSpec()) +} + +internal fun VectorAggregationSpec.toMetricDefinition(): MetricDefinition { + return MetricDefinition(this.metricType, this.budget?.toInternalBudgetPerOpSpec()) +} diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/Query.kt b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/Query.kt index 16dde3d5..d0cc3adf 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/Query.kt +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/api/Query.kt @@ -23,11 +23,14 @@ import com.google.privacy.differentialprivacy.pipelinedp4j.core.DpEngine import com.google.privacy.differentialprivacy.pipelinedp4j.core.DpEngineBudgetSpec import com.google.privacy.differentialprivacy.pipelinedp4j.core.Encoder import com.google.privacy.differentialprivacy.pipelinedp4j.core.EncoderFactory +import com.google.privacy.differentialprivacy.pipelinedp4j.core.FeatureSpec import com.google.privacy.differentialprivacy.pipelinedp4j.core.FeatureValuesExtractor import com.google.privacy.differentialprivacy.pipelinedp4j.core.FrameworkCollection import com.google.privacy.differentialprivacy.pipelinedp4j.core.FrameworkTable import com.google.privacy.differentialprivacy.pipelinedp4j.core.MetricType +import com.google.privacy.differentialprivacy.pipelinedp4j.core.ScalarFeatureSpec import com.google.privacy.differentialprivacy.pipelinedp4j.core.SelectPartitionsParams +import com.google.privacy.differentialprivacy.pipelinedp4j.core.VectorFeatureSpec import com.google.privacy.differentialprivacy.pipelinedp4j.proto.DpAggregates import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PerFeature import com.google.privacy.differentialprivacy.pipelinedp4j.proto.copy @@ -494,22 +497,53 @@ protected constructor( valueAggregations: ValueAggregations<*>?, vectorAggregations: VectorAggregations<*>?, ): AggregationParams { - val valueContributionBounds = valueAggregations?.contributionBounds - val vectorContributionBounds = vectorAggregations?.vectorContributionBounds + val nonFeatureMetrics = + aggregationSpecs + .filter { it is Count || it is PrivacyIdCount } + .map { it.toNonFeatureMetricDefinition() } + val features = + buildList { + if (valueAggregations != null) { + val valueContributionBounds = valueAggregations.contributionBounds + add( + ScalarFeatureSpec( + featureId = valueAggregations.getFeatureId(), + metrics = + valueAggregations.valueAggregationSpecs + .map { it.toMetricDefinition() } + .toImmutableList(), + minValue = valueContributionBounds.valueBounds?.minValue, + maxValue = valueContributionBounds.valueBounds?.maxValue, + minTotalValue = valueContributionBounds.totalValueBounds?.minValue, + maxTotalValue = valueContributionBounds.totalValueBounds?.maxValue, + ) + ) + } + if (vectorAggregations != null) { + val vectorContributionBounds = vectorAggregations.vectorContributionBounds + add( + VectorFeatureSpec( + featureId = vectorAggregations.getFeatureId(), + metrics = + vectorAggregations.vectorAggregationSpecs + .map { it.toMetricDefinition() } + .toImmutableList(), + vectorSize = vectorAggregations.vectorSize, + normKind = vectorContributionBounds.maxVectorTotalNorm.normKind.toInternalNormKind(), + vectorMaxTotalNorm = vectorContributionBounds.maxVectorTotalNorm.value, + ) + ) + } + } + return AggregationParams( - metrics = ImmutableList.copyOf(aggregationSpecs.metrics()), + nonFeatureMetrics = nonFeatureMetrics.toImmutableList(), + features = features.toImmutableList(), noiseKind = checkNotNull(noiseKind) { "noiseKind cannot be null if there are aggregations." } .toInternalNoiseKind(), maxPartitionsContributed = contributionBoundingLevel.getMaxPartitionsContributed(), maxContributionsPerPartition = contributionBoundingLevel.getMaxContributionsPerPartition(), - minValue = valueContributionBounds?.valueBounds?.minValue, - maxValue = valueContributionBounds?.valueBounds?.maxValue, - minTotalValue = valueContributionBounds?.totalValueBounds?.minValue, - maxTotalValue = valueContributionBounds?.totalValueBounds?.maxValue, - vectorNormKind = vectorContributionBounds?.maxVectorTotalNorm?.normKind?.toInternalNormKind(), - vectorMaxTotalNorm = vectorContributionBounds?.maxVectorTotalNorm?.value, - vectorSize = vectorAggregations?.vectorSize, partitionSelectionBudget = groupsType.getBudget()?.toInternalBudgetPerOpSpec(), preThreshold = groupsType.getPreThreshold(), contributionBoundingLevel = contributionBoundingLevel.toInternalContributionBoundingLevel(), @@ -534,3 +568,5 @@ protected constructor( } } } + +private fun Iterable.toImmutableList(): ImmutableList = ImmutableList.copyOf(this) diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/Combiners.kt b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/Combiners.kt index 790f1b95..a739be2b 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/Combiners.kt +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/Combiners.kt @@ -390,7 +390,17 @@ class SumCombiner( private val budget: AllocatedBudget, private val noiseFactory: (NoiseKind) -> Noise, private val executionMode: ExecutionMode, + private val featureSpec: ScalarFeatureSpec, ) : Combiner, Serializable { + private val featureMinTotalValue = + checkNotNull(featureSpec.minTotalValue) { + "minTotalValue should be set when requesting SUM metric for feature ${featureSpec.featureId}" + } + private val featureMaxTotalValue = + checkNotNull(featureSpec.maxTotalValue) { + "maxTotalValue should be set when requesting SUM metric for feature ${featureSpec.featureId}" + } + override val requiresPerPartitionBoundedInput = false /** @@ -412,8 +422,8 @@ class SumCombiner( contributions.singleValueContributionsList .sum() .coerceInIfContributionBoundingEnabled( - aggregationParams.minTotalValue!!, - aggregationParams.maxTotalValue!!, + featureMinTotalValue, + featureMaxTotalValue, aggregationParams, executionMode, ) @@ -440,8 +450,7 @@ class SumCombiner( */ override fun computeMetrics(accumulator: SumAccumulator): Double { val noise = noiseFactory(aggregationParams.noiseKind) - val lInfSensitivity = - max(abs(aggregationParams.minTotalValue!!), abs(aggregationParams.maxTotalValue!!)) + val lInfSensitivity = max(abs(featureMinTotalValue), abs(featureMaxTotalValue)) return noise.addNoise( accumulator.sum, @@ -468,7 +477,12 @@ class VectorSumCombiner( private val budget: AllocatedBudget, private val noiseFactory: (NoiseKind) -> Noise, private val executionMode: ExecutionMode, + private val featureSpec: VectorFeatureSpec, ) : Combiner>, Serializable { + private val normKind = featureSpec.normKind + private val vectorMaxTotalNorm = featureSpec.vectorMaxTotalNorm + private val vectorSize = featureSpec.vectorSize + override val requiresPerPartitionBoundedInput = false /** @@ -486,12 +500,9 @@ class VectorSumCombiner( contributions.multiValueContributionsList .map { contribution -> ArrayRealVector(contribution.valuesList.toDoubleArray()) } .reduceOrNull { acc, vector -> acc.add(vector) } - ?.clipIfContributionBoundingEnabled( - aggregationParams.vectorMaxTotalNorm!!, - aggregationParams.vectorNormKind!!, - ) + ?.clipIfContributionBoundingEnabled(vectorMaxTotalNorm, normKind) ?.toArray() - ?.asList() ?: List(aggregationParams.vectorSize!!) { 0.0 } + ?.asList() ?: List(vectorSize) { 0.0 } } /** @@ -525,9 +536,9 @@ class VectorSumCombiner( is LaplaceNoise -> { val l1Sensitivity = calculateL1Sensistivity( - aggregationParams.vectorNormKind!!, - aggregationParams.vectorMaxTotalNorm!!, - aggregationParams.vectorSize!!, + normKind, + vectorMaxTotalNorm, + vectorSize, aggregationParams.maxPartitionsContributed!!, ) vector.map { noise.addNoise(it, l1Sensitivity, budget.epsilon(), budget.delta()) } @@ -535,9 +546,9 @@ class VectorSumCombiner( is GaussianNoise -> { val l2Sensitivity = calculateL2Sensistivity( - aggregationParams.vectorNormKind!!, - aggregationParams.vectorMaxTotalNorm!!, - aggregationParams.vectorSize!!, + normKind, + vectorMaxTotalNorm, + vectorSize, aggregationParams.maxPartitionsContributed!!, ) vector.map { noise.addNoise(it, l2Sensitivity, budget.epsilon(), budget.delta()) } @@ -631,10 +642,19 @@ class MeanCombiner( private val sumBudget: AllocatedBudget, private val noiseFactory: (NoiseKind) -> Noise, private val executionMode: ExecutionMode, + private val featureSpec: ScalarFeatureSpec, ) : Combiner, Serializable { - private val midValue = (aggregationParams.minValue!! + aggregationParams.maxValue!!) / 2 - private val returnCount = aggregationParams.metrics.any { it.type == COUNT } - private val returnSum = aggregationParams.metrics.any { it.type == SUM } + private val featureMinValue = + checkNotNull(featureSpec.minValue) { + "minValue should be set when requesting mean metric for feature ${featureSpec.featureId}" + } + private val featureMaxValue = + checkNotNull(featureSpec.maxValue) { + "maxValue should be set when requesting mean metric for feature ${featureSpec.featureId}" + } + private val midValue = (featureMinValue + featureMaxValue) / 2 + private val returnCount = aggregationParams.nonFeatureMetrics.any { it.type == COUNT } + private val returnSum = featureSpec.metrics.any { it.type == SUM } override val requiresPerPartitionBoundedInput = true @@ -657,8 +677,8 @@ class MeanCombiner( contributions.singleValueContributionsList .map { it.coerceInIfContributionBoundingEnabled( - aggregationParams.minValue!!, - aggregationParams.maxValue!!, + featureMinValue, + featureMaxValue, aggregationParams, executionMode, ) - midValue @@ -692,6 +712,7 @@ class MeanCombiner( val dpNormalizedSum = getNoisedNormalizedSum( accumulator.normalizedSum, + featureMaxValue, midValue, aggregationParams, sumBudget, @@ -734,7 +755,17 @@ class QuantilesCombiner( private val budget: AllocatedBudget, private val noiseFactory: (NoiseKind) -> Noise, private val executionMode: ExecutionMode, + private val featureSpec: ScalarFeatureSpec, ) : Combiner>, Serializable { + private val featureMinValue = + checkNotNull(featureSpec.minValue) { + "minValue should be set when requesting quantiles metric for feature ${featureSpec.featureId}" + } + private val featureMaxValue = + checkNotNull(featureSpec.maxValue) { + "maxValue should be set when requesting quantiles metric for feature ${featureSpec.featureId}" + } + override val requiresPerPartitionBoundedInput = true /** @@ -820,8 +851,8 @@ class QuantilesCombiner( ) // Min and max values aren't changed if there is no contribution bounding because the extreme // values aren't supported by the DP library. - .lower(aggregationParams.minValue!!) - .upper(aggregationParams.maxValue!!) + .lower(featureMinValue) + .upper(featureMaxValue) .build() } @@ -846,11 +877,20 @@ class VarianceCombiner( private val sumSquaresBudget: AllocatedBudget, private val noiseFactory: (NoiseKind) -> Noise, private val executionMode: ExecutionMode, + private val featureSpec: ScalarFeatureSpec, ) : Combiner, Serializable { - private val midValue = (aggregationParams.minValue!! + aggregationParams.maxValue!!) / 2 - private val returnCount = aggregationParams.metrics.any { it.type == COUNT } - private val returnSum = aggregationParams.metrics.any { it.type == SUM } - private val returnMean = aggregationParams.metrics.any { it.type == MEAN } + private val featureMinValue = + checkNotNull(featureSpec.minValue) { + "minValue should be set when requesting variance metrics for feature ${featureSpec.featureId}" + } + private val featureMaxValue = + checkNotNull(featureSpec.maxValue) { + "maxValue should be set when requesting variance metrics for feature ${featureSpec.featureId}" + } + private val midValue = (featureMinValue + featureMaxValue) / 2 + private val returnCount = aggregationParams.nonFeatureMetrics.any { it.type == COUNT } + private val returnSum = featureSpec.metrics.any { it.type == SUM } + private val returnMean = featureSpec.metrics.any { it.type == MEAN } override val requiresPerPartitionBoundedInput = true @@ -872,8 +912,8 @@ class VarianceCombiner( val coercedValues = contributions.singleValueContributionsList.map { it.coerceInIfContributionBoundingEnabled( - aggregationParams.minValue!!, - aggregationParams.maxValue!!, + featureMinValue, + featureMaxValue, aggregationParams, executionMode, ) - midValue @@ -913,6 +953,7 @@ class VarianceCombiner( val dpNormalizedSum = getNoisedNormalizedSum( accumulator.normalizedSum, + featureMaxValue, midValue, aggregationParams, sumBudget, @@ -921,6 +962,7 @@ class VarianceCombiner( val dpNormalizedSumSquares = getNoisedNormalizedSumOfSquares( accumulator.normalizedSumSquares, + featureMaxValue, midValue, aggregationParams, sumSquaresBudget, @@ -1173,6 +1215,7 @@ private fun getNoisedCount( private fun getNoisedNormalizedSum( normalizedSum: Double, + featureMaxValue: Double, midValue: Double, aggregationParams: AggregationParams, sumBudget: AllocatedBudget, @@ -1182,7 +1225,7 @@ private fun getNoisedNormalizedSum( // All values were normalized to the symmetric range [minValue-midValue, maxValue-midValue]. // So the linf sensitivity of 1 record is (maxValue-midValue). val lInfSensitivity = - (aggregationParams.maxValue!! - midValue) * aggregationParams.maxContributionsPerPartition!! + (featureMaxValue - midValue) * aggregationParams.maxContributionsPerPartition!! return noise.addNoise( normalizedSum, aggregationParams.maxPartitionsContributed!!, @@ -1194,6 +1237,7 @@ private fun getNoisedNormalizedSum( private fun getNoisedNormalizedSumOfSquares( normalizedSumOfSquares: Double, + featureMaxValue: Double, midValue: Double, aggregationParams: AggregationParams, sumOfSquaresBudget: AllocatedBudget, @@ -1204,7 +1248,7 @@ private fun getNoisedNormalizedSumOfSquares( // were then squared and summed up. // So the linf sensitivity of 1 record is (maxValue-midValue)^2 distributed across allowed // partition contributions. - val distance = aggregationParams.maxValue!! - midValue + val distance = featureMaxValue - midValue val lInfSensitivity = distance * distance * aggregationParams.maxContributionsPerPartition!! return noise.addNoise( normalizedSumOfSquares, diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/ContributionSampler.kt b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/ContributionSampler.kt index 0d5b07b9..228da530 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/ContributionSampler.kt +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/ContributionSampler.kt @@ -17,8 +17,6 @@ package com.google.privacy.differentialprivacy.pipelinedp4j.core import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributions -import com.google.privacy.differentialprivacy.pipelinedp4j.proto.PrivacyIdContributionsKt.multiValueContribution -import com.google.privacy.differentialprivacy.pipelinedp4j.proto.privacyIdContributions /** Bounds contributions to the entire non-aggregated data collection. */ sealed interface ContributionSampler { @@ -34,45 +32,6 @@ sealed interface ContributionSampler { ): FrameworkTable } -/** - * Samples contributions to [maxPartitionsContributed] partitions among the given [contributions] - * assuming that they all belong to the same [PrivacyId]. - */ -internal fun samplePartitions( - contributions: Iterable>, - maxPartitionsContributed: Int, -): Collection> { - val allPartitions = contributions.map { it.partitionKey() }.toSet() - val keptPartitions = sampleNElements(allPartitions, maxPartitionsContributed).toSet() - return contributions.filter { it.partitionKey() in keptPartitions } -} - -/** - * Samples [maxContributionsPerPartition] contributions among the given [partitionContributions] - * assuming that they all belong to the same [PrivacyId] and [PartitionKey]. Combines the result - * into a [PrivacyIdContributions] and returns it. - */ -internal fun sampleContributionsPerPartition( - partitionContributions: Iterable>, - maxContributionsPerPartition: Int, -): PrivacyIdContributions { - val sampledContributions = - sampleNElements(partitionContributions.toList(), maxContributionsPerPartition) - return privacyIdContributions { - for (contribution in sampledContributions) { - // TODO: Update to add support for multiple features. - // We expect that contribution contains only one feature with featureId="", - // produced by DataExtractors. - val perFeatureValues = contribution.perFeatureValues().single() - if (perFeatureValues.values.size == 1) { - singleValueContributions += perFeatureValues.values - } else { - multiValueContributions += multiValueContribution { values += perFeatureValues.values } - } - } - } -} - private fun sampleNElements(elements: Collection, N: Int): Collection { if (elements.size <= N) return elements return elements.shuffled().take(N) diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngine.kt b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngine.kt index 086a391d..5199ebae 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngine.kt +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngine.kt @@ -279,10 +279,14 @@ internal constructor( noiseFactory: (NoiseKind) -> Noise, executionMode: ExecutionMode, ): CompoundCombiner { - val meanInMetrics = params.metrics.any { it.type == MEAN } - val varianceInMetrics = params.metrics.any { it.type == VARIANCE } + require(params.features.size <= 1) { "Multi-feature aggregations are not supported yet" } + val featureSpec = params.features.singleOrNull() + val allMetrics = + (featureSpec?.metrics ?: emptyList()) + params.nonFeatureMetrics + val meanInMetrics = allMetrics.any { it.type == MEAN } + val varianceInMetrics = allMetrics.any { it.type == VARIANCE } val metricCombiners = - params.metrics + allMetrics .mapNotNull { metric -> when (metric.type) { PRIVACY_ID_COUNT -> { @@ -319,7 +323,13 @@ internal constructor( } SUM -> { if (!meanInMetrics && !varianceInMetrics) { - SumCombiner(params, getBudgetForMetric(metric, params), noiseFactory, executionMode) + SumCombiner( + params, + getBudgetForMetric(metric, params), + noiseFactory, + executionMode, + featureSpec as ScalarFeatureSpec, + ) } else { null } @@ -330,12 +340,20 @@ internal constructor( getBudgetForMetric(metric, params), noiseFactory, executionMode, + featureSpec as VectorFeatureSpec, ) } MEAN -> { if (!varianceInMetrics) { val (countBudget, sumBudget) = calculateCountSumBudgetsForMean(params) - MeanCombiner(params, countBudget, sumBudget, noiseFactory, executionMode) + MeanCombiner( + params, + countBudget, + sumBudget, + noiseFactory, + executionMode, + featureSpec as ScalarFeatureSpec, + ) } else { null } @@ -349,6 +367,7 @@ internal constructor( sumSquaresBudget, noiseFactory, executionMode, + featureSpec as ScalarFeatureSpec, ) } @@ -359,12 +378,13 @@ internal constructor( getBudgetForMetric(metric, params), noiseFactory, executionMode, + featureSpec as ScalarFeatureSpec, ) } } } .toMutableList() - if (!usePublicPartitions && !params.metrics.any { it.type == PRIVACY_ID_COUNT }) { + if (!usePublicPartitions && !allMetrics.any { it.type == PRIVACY_ID_COUNT }) { // For private partitions, we need to compute the privacy ID count, even if PRIVACY_ID_COUNT // is not requested in metrics. metricCombiners.add(ExactPrivacyIdCountCombiner()) @@ -436,10 +456,13 @@ internal constructor( private fun calculateCountSumBudgetsForMean( params: AggregationParams ): Pair { - fun getMetricDefinition(metricType: MetricType) = params.metrics.find { it.type == metricType } + fun getMetricDefinitionFromFeature(metricType: MetricType) = + params.features.single().metrics.find { it.type == metricType } + fun getMetricDefinitionFromNonFeature(metricType: MetricType) = + params.nonFeatureMetrics.find { it.type == metricType } // meanDefinition is not null, because this function is called only when MEAN is in metrics. - val meanDefinition = getMetricDefinition(MEAN)!! + val meanDefinition = getMetricDefinitionFromFeature(MEAN)!! // Budget spec for COUNT. val countBudgetSpec: BudgetPerOpSpec = @@ -448,7 +471,8 @@ internal constructor( meanDefinition.budgetSpec!!.times(0.5) } else { // Or COUNT spec or the default budget spec. - getMetricDefinition(COUNT)?.budgetSpec ?: RelativeBudgetPerOpSpec(weight = 1.0) + getMetricDefinitionFromNonFeature(COUNT)?.budgetSpec + ?: RelativeBudgetPerOpSpec(weight = 1.0) } // Budget spec for SUM. @@ -458,7 +482,7 @@ internal constructor( meanDefinition.budgetSpec!!.times(0.5) } else { // Or SUM spec or the default budget spec. - getMetricDefinition(SUM)?.budgetSpec ?: RelativeBudgetPerOpSpec(weight = 1.0) + getMetricDefinitionFromFeature(SUM)?.budgetSpec ?: RelativeBudgetPerOpSpec(weight = 1.0) } return budgetAccountant.requestBudget( @@ -473,7 +497,7 @@ internal constructor( params: AggregationParams ): Triple { // Variance is not null because this function is called only when it is in metrics. - val varianceDefinition = params.metrics.find { it.type == VARIANCE }!! + val varianceDefinition = params.features.single().metrics.find { it.type == VARIANCE }!! // Budget is split equally between COUNT, SUM and SUM_SQUARES. val budgetSplit = 1.0 / 3.0 // If varianceDefinition.budgetSpec is null, the default budget spec is used. @@ -512,5 +536,5 @@ private fun usePostAggregationPartitionSelection( executionMode: ExecutionMode, ): Boolean = !usePublicPartitions && - params.metrics.any { it.type == PRIVACY_ID_COUNT } && + params.nonFeatureMetrics.any { it.type == PRIVACY_ID_COUNT } && executionMode.partitionSelectionIsNonDeterministic diff --git a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParams.kt b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParams.kt index dbb20caa..7ba21d42 100644 --- a/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParams.kt +++ b/pipelinedp4j/main/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParams.kt @@ -117,7 +117,8 @@ private fun partitionsContributedBoundingShouldBeApplied( @Immutable data class AggregationParams( /** The metrics being anonymized. */ - val metrics: ImmutableList, + val nonFeatureMetrics: ImmutableList, + val features: ImmutableList = ImmutableList.of(), val noiseKind: NoiseKind, /** * The maximum number of partitions that can be contributed by a privacy unit. Used by all @@ -134,37 +135,6 @@ data class AggregationParams( * Note this is mutually exclusive with maxContributionsPerPartition. */ val maxContributions: Int? = null, - /** - * The minimum bound on the individual value that can be contributed by a user to a partition. - * Used for MEAN and QUANTILES. - */ - val minValue: Double? = null, - /** - * The maximum bound on the individual value that can be contributed by a user to a partition. - * Used for MEAN and QUANTILES. - */ - val maxValue: Double? = null, - /** - * The minimum bound on the sum of the values that can be contributed by a user to a partition. - * Used for SUM. - */ - val minTotalValue: Double? = null, - /** - * The maximum bound on the sum of the values that can be contributed by a user to a partition. - * Used for SUM. - */ - val maxTotalValue: Double? = null, - /** The type of norm. Used for VECTOR_SUM. */ - val vectorNormKind: NormKind? = null, - /** - * The maximum norm of the sum of the vectors that can be contributed by a user to a partition. - * Used for VECTOR_SUM. - */ - val vectorMaxTotalNorm: Double? = null, - /** - * The size of the vectors that can be contributed by a user to a partition. Used for VECTOR_SUM. - */ - val vectorSize: Int? = null, /** * The amount of budget used for partition selection. * @@ -216,26 +186,44 @@ fun validateAggregationParams( } } - // Metrics. - require(!params.metrics.isEmpty()) { "metrics must not be empty." } - require(params.metrics.map { it.type }.distinct().size == params.metrics.size) { - "metrics must not contain duplicate metric types. Provided ${params.metrics.map { it.type }}." + // Metrics & features validation. + require(params.nonFeatureMetrics.isNotEmpty() || params.features.isNotEmpty()) { + "At least one of nonFeatureMetrics or features must be specified." + } + require( + params.nonFeatureMetrics.all { + it.type == MetricType.COUNT || it.type == MetricType.PRIVACY_ID_COUNT + } + ) { + "Only COUNT and PRIVACY_ID_COUNT are allowed in AggregationParams.nonFeatureMetrics. Other metrics should be provided via AggregationParams.features." + } + val featureMetrics = params.features.flatMap { it.metrics } + require( + featureMetrics.none { it.type == MetricType.COUNT || it.type == MetricType.PRIVACY_ID_COUNT } + ) { + "COUNT and PRIVACY_ID_COUNT are not allowed in features. They should be provided via AggregationParams.nonFeatureMetrics." + } + + require( + params.nonFeatureMetrics.map { it.type }.distinct().size == params.nonFeatureMetrics.size + ) { + "nonFeatureMetrics must not contain duplicate metric types. Provided ${params.nonFeatureMetrics.map { it.type }}." } + for (feature in params.features) { + require(feature.metrics.map { it.type }.distinct().size == feature.metrics.size) { + "feature ${feature.featureId} must not contain duplicate metric types. Provided ${feature.metrics.map { it.type }}" + } + } + require(params.features.map { it.featureId }.distinct().size == params.features.size) { + "featureId must be unique. Provided ${params.features.map { it.featureId }}" + } + // Max contributions per partition. require(isGreaterThanZeroIfSet(params.maxContributionsPerPartition)) { "maxContributionsPerPartition must be positive. Provided value: " + "${params.maxContributionsPerPartition}." } - if (params.contributionBoundingLevel.withContributionsPerPartitionBounding) { - require( - params.maxContributionsPerPartition != null || - params.maxContributions != null || - (params.minTotalValue != null && params.maxTotalValue != null) || - params.vectorMaxTotalNorm != null - ) { - "maxContributionsPerPartition or maxContributions or (minTotalValue, maxTotalValue) or vectorMaxTotalNorm must be set because specified ${params.contributionBoundingLevel} contribution bounding level requires per partition bounding." - } - } + // Max contributions. require(isGreaterThanZeroIfSet(params.maxContributions)) { "maxContributions must be positive. Provided value: " + "${params.maxContributions}." @@ -251,52 +239,90 @@ fun validateAggregationParams( "Provided values: maxContributions=${params.maxContributions}, " + "maxContributionsPerPartition=${params.maxContributionsPerPartition}." } + + // Required parameters per each metric. + if (params.contributionBoundingLevel.withContributionsPerPartitionBounding) { + val perPartitionBoundsSet = params.maxContributionsPerPartition != null + val crossPartitionBoundsSet = params.maxContributions != null + val totalValueBoundsSet = + params.features.any { + (it is ScalarFeatureSpec && it.minTotalValue != null && it.maxTotalValue != null) || + it is VectorFeatureSpec + } + require(perPartitionBoundsSet || crossPartitionBoundsSet || totalValueBoundsSet) { + "maxContributionsPerPartition or maxContributions or (minTotalValue, maxTotalValue) or vectorMaxTotalNorm must be set because specified ${params.contributionBoundingLevel} contribution bounding level requires per partition bounding" + } + } + + if (metricIsRequested(COUNT::class, params.nonFeatureMetrics)) { + require(params.maxContributionsPerPartition != null || params.maxContributions != null) { + "maxContributionsPerPartition or maxContributions must be set for COUNT metric." + } + } + + for (feature in params.features) { + when (feature) { + is ScalarFeatureSpec -> validateScalarFeature(params, feature) + is VectorFeatureSpec -> validateVectorFeature(params, feature) + } + } + + // Partition selection + if (usePublicPartitions) { + require(params.partitionSelectionBudget == null) { + "partitionSelectionBudget can not be set for public partitions." + } + } + + // ValueExtractor: only COUNT and PRIVACY_ID_COUNT can be computed w/o a value extractor. + if (!hasValueExtractor) { + require(featureMetrics.isEmpty()) { + "Metrics ${featureMetrics.map { it.type }} require a value extractor." + } + } +} + +private fun validateScalarFeature(params: AggregationParams, feature: ScalarFeatureSpec) { // Min/Max bounds - require(sameNullability(params.minValue, params.maxValue)) { + require(sameNullability(feature.minValue, feature.maxValue)) { "minValue and maxValue must be simultaneously equal or not equal to null. Provided values: " + - "minValue=${params.minValue}, maxValue=${params.maxValue}." + "minValue=${feature.minValue}, maxValue=${feature.maxValue}." } var areMinMaxValuesSet = false - if (params.minValue != null && params.maxValue != null) { + if (feature.minValue != null && feature.maxValue != null) { areMinMaxValuesSet = true - require(params.minValue < params.maxValue) { + require(feature.minValue < feature.maxValue) { "minValue must be less than maxValue. Provided values: " + - "minValue=${params.minValue}, maxValue=${params.maxValue}." + "minValue=${feature.minValue}, maxValue=${feature.maxValue}." } } - require(sameNullability(params.minTotalValue, params.maxTotalValue)) { + require(sameNullability(feature.minTotalValue, feature.maxTotalValue)) { "minTotalValue and maxTotalValue must be simultaneously equal or not equal to null. " + - "Provided values: minTotalValue=${params.minTotalValue}, " + - "maxTotalValue=${params.maxTotalValue}." + "Provided values: minTotalValue=${feature.minTotalValue}, " + + "maxTotalValue=${feature.maxTotalValue}." } var areMinMaxTotalValuesSet = false - if (params.minTotalValue != null && params.maxTotalValue != null) { + if (feature.minTotalValue != null && feature.maxTotalValue != null) { areMinMaxTotalValuesSet = true - require(params.minTotalValue <= params.maxTotalValue) { + require(feature.minTotalValue <= feature.maxTotalValue) { "minTotalValue must be less or equal to maxTotalValue. Provided values: " + - "minTotalValue=${params.minTotalValue}, maxTotalValue=${params.maxTotalValue}." + "minTotalValue=${feature.minTotalValue}, maxTotalValue=${feature.maxTotalValue}." } } - // Required parameters per each metric. - if (metricIsRequested(COUNT::class, params)) { - require(params.maxContributionsPerPartition != null || params.maxContributions != null) { - "maxContributionsPerPartition or maxContributions must be set for COUNT metric." - } - } // When MEAN and SUM are set together, then contribution bounding with (minValue, maxValue) // is used. SUM and VARIANCE should not be set together. if ( - metricIsRequested(SUM::class, params) && - !metricIsRequested(MEAN::class, params) && - !metricIsRequested(VARIANCE::class, params) + metricIsRequested(SUM::class, feature.metrics) && + !metricIsRequested(MEAN::class, feature.metrics) && + !metricIsRequested(VARIANCE::class, feature.metrics) ) { require(areMinMaxTotalValuesSet) { "(minTotalValue, maxTotalValue) must be set for SUM metrics." } } - if (metricIsRequested(MEAN::class, params)) { + if (metricIsRequested(MEAN::class, feature.metrics)) { require(params.maxContributionsPerPartition != null || params.maxContributions != null) { "maxContributionsPerPartition or maxContributions must be set for MEAN metric." } @@ -306,25 +332,25 @@ fun validateAggregationParams( } } require( - params.metrics.find { it.type == COUNT }?.budgetSpec == null || - params.metrics.find { it.type == MEAN }?.budgetSpec == null + params.nonFeatureMetrics.find { it.type == COUNT }?.budgetSpec == null || + feature.metrics.find { it.type == MEAN }?.budgetSpec == null ) { "BudgetPerOpSpec can not be set for both COUNT and MEAN metrics." } require( - params.metrics.find { it.type == SUM }?.budgetSpec == null || - params.metrics.find { it.type == MEAN }?.budgetSpec === null + feature.metrics.find { it.type == SUM }?.budgetSpec == null || + feature.metrics.find { it.type == MEAN }?.budgetSpec === null ) { "BudgetPerOpSpec can not be set for both SUM and MEAN metrics." } require( - params.metrics.find { it.type == MEAN }?.budgetSpec == null || - params.metrics.find { it.type == VARIANCE }?.budgetSpec == null + feature.metrics.find { it.type == MEAN }?.budgetSpec == null || + feature.metrics.find { it.type == VARIANCE }?.budgetSpec == null ) { "BudgetPerOpSpec can not be set for both MEAN and VARIANCE metrics." } // Validation for VARIANCE metric. - if (metricIsRequested(VARIANCE::class, params)) { + if (metricIsRequested(VARIANCE::class, feature.metrics)) { require(params.maxContributionsPerPartition != null || params.maxContributions != null) { "maxContributionsPerPartition or maxContributions must be set for VARIANCE metric." } @@ -334,67 +360,40 @@ fun validateAggregationParams( } } require( - params.metrics.find { it.type == SUM }?.budgetSpec == null || - params.metrics.find { it.type == VARIANCE }?.budgetSpec == null + feature.metrics.find { it.type == SUM }?.budgetSpec == null || + feature.metrics.find { it.type == VARIANCE }?.budgetSpec == null ) { "BudgetPerOpSpec can not be set for both SUM and VARIANCE metrics." } require( - params.metrics.find { it.type == COUNT }?.budgetSpec == null || - params.metrics.find { it.type == VARIANCE }?.budgetSpec == null + params.nonFeatureMetrics.find { it.type == COUNT }?.budgetSpec == null || + feature.metrics.find { it.type == VARIANCE }?.budgetSpec == null ) { "BudgetPerOpSpec can not be set for both COUNT and VARIANCE metrics." } // Validation for QUANTILES metric. - if (metricIsRequested(QUANTILES::class, params)) { + if (metricIsRequested(QUANTILES::class, feature.metrics)) { require(params.maxContributionsPerPartition != null) { "maxContributionsPerPartition must be set for QUANTILES metric." } require(areMinMaxValuesSet) { "(minValue, maxValue) must be set for QUANTILES metric." } } +} + +private fun validateVectorFeature(params: AggregationParams, feature: VectorFeatureSpec) { // Validation for VECTOR_SUM metric. - if (metricIsRequested(VECTOR_SUM::class, params)) { - require(params.vectorNormKind != null) { "vectorNormKind must be set for VECTOR_SUM metric." } + if (metricIsRequested(VECTOR_SUM::class, feature.metrics)) { when (params.noiseKind) { NoiseKind.LAPLACE -> - require(params.vectorNormKind in listOf(NormKind.L_INF, NormKind.L1)) { - "vectorNormKind must be L_INF or L1 for LAPLACE noise. Provided value: ${params.vectorNormKind}." + require(feature.normKind in listOf(NormKind.L_INF, NormKind.L1)) { + "vectorNormKind must be L_INF or L1 for LAPLACE noise. Provided value: ${feature.normKind}." } NoiseKind.GAUSSIAN -> - require(params.vectorNormKind in listOf(NormKind.L_INF, NormKind.L2)) { - "vectorNormKind must be L_INF or L2 for GAUSSIAN noise. Provided value: ${params.vectorNormKind}." + require(feature.normKind in listOf(NormKind.L_INF, NormKind.L2)) { + "vectorNormKind must be L_INF or L2 for GAUSSIAN noise. Provided value: ${feature.normKind}." } } - require(params.vectorMaxTotalNorm != null) { - "vectorMaxTotalNorm must be set for VECTOR_SUM metric." - } - require(params.vectorSize != null) { "vectorSize must be set for VECTOR_SUM metric." } - - require( - !metricIsRequested(SUM::class, params) && - !metricIsRequested(MEAN::class, params) && - !metricIsRequested(VARIANCE::class, params) && - !metricIsRequested(QUANTILES::class, params) - ) { - "VECTOR_SUM can not be computed together with scalar metrics such as SUM, MEAN, VARIANCE and QUANTILES." - } - } - - // Partition selection - if (usePublicPartitions) { - require(params.partitionSelectionBudget == null) { - "partitionSelectionBudget can not be set for public partitions." - } - } - - // ValueExtractor: only COUNT and PRIVACY_ID_COUNT can be computed w/o a value extractor. - if (!hasValueExtractor) { - val metricsWhichRequireValueExtractor = - params.metrics.map { it.type }.filter { it != COUNT && it != PRIVACY_ID_COUNT } - require(metricsWhichRequireValueExtractor.isEmpty()) { - "Metrics $metricsWhichRequireValueExtractor require a value extractor." - } } } @@ -512,6 +511,69 @@ enum class ExecutionMode( ), } +/** + * Represents a feature for which DP metrics are calculated. + * + * A feature is a characteristic of the input data. For example, in a dataset of user activities, a + * feature could be "time spent on page" or "user embedding". This interface and its implementations + * are used to specify parameters for metrics calculated on these features. + */ +@Immutable +sealed interface FeatureSpec : Serializable { + /** A unique identifier for the feature. */ + val featureId: String + /** The list of DP metrics to be computed for this feature. */ + val metrics: ImmutableList +} + +/** + * A [FeatureSpec] for scalar-valued features. + * + * This is used for features where each data point is a single numerical value (e.g., a Double). It + * is suitable for metrics like [MetricType.SUM], [MetricType.MEAN], [MetricType.VARIANCE], and + * [MetricType.QUANTILES]. + * + * @property minValue The minimum value that a single contribution can take. + * @property maxValue The maximum value that a single contribution can take. + * @property minTotalValue The minimum total value that contributions from a single privacy unit can + * sum up to per partition. Must be set if [MetricType.SUM] is requested and neither + * [MetricType.MEAN] nor [MetricType.VARIANCE] is requested; otherwise, [minValue] and [maxValue] + * must be set. + * @property maxTotalValue The maximum total value that contributions from a single privacy unit can + * sum up to per partition. Must be set if [MetricType.SUM] is requested and neither + * [MetricType.MEAN] nor [MetricType.VARIANCE] is requested; otherwise, [minValue] and [maxValue] + * must be set. + */ +@Immutable +data class ScalarFeatureSpec( + override val featureId: String, + override val metrics: ImmutableList, + val minValue: Double? = null, + val maxValue: Double? = null, + val minTotalValue: Double? = null, + val maxTotalValue: Double? = null, +) : FeatureSpec, Serializable + +/** + * A [FeatureSpec] for vector-valued features. + * + * This is used for features where each data point is a vector of numerical values (e.g., an + * embedding). It is suitable for metrics like [MetricType.VECTOR_SUM]. + * + * @property vectorSize The size of the vector. + * @property normKind The type of norm to use for contribution bounding. + * @property vectorMaxTotalNorm The maximum total norm of contributions from a single privacy unit + * per partition. + */ +@Immutable +data class VectorFeatureSpec( + override val featureId: String, + override val metrics: ImmutableList, + val vectorSize: Int, + val normKind: NormKind, + val vectorMaxTotalNorm: Double, +) : FeatureSpec, Serializable + /** The definition of the DP metric to compute. */ @Immutable data class MetricDefinition( @@ -525,6 +587,8 @@ data class MetricDefinition( val budgetSpec: BudgetPerOpSpec? = null, ) : Serializable +// TODO: have 2 types of MetricType feature and non feature for better code +// readability and remove complicated checks. /** The types of metrics that can be anonymized. */ @Immutable sealed class MetricType : Serializable { @@ -570,8 +634,10 @@ private fun sameNullability(a: Double?, b: Double?): Boolean { return (a == null) == (b == null) } -private fun metricIsRequested(metricTypeClass: KClass, params: AggregationParams) = - params.metrics.any { metricTypeClass.isInstance(it.type) } +private fun metricIsRequested( + metricTypeClass: KClass, + metrics: Collection, +) = metrics.any { metricTypeClass.isInstance(it.type) } private fun isGreaterThanZeroIfSet(value: Int?): Boolean = value == null || value > 0 diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CompoundCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CompoundCombinerTest.kt index b532db6d..ebbdc842 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CompoundCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CompoundCombinerTest.kt @@ -42,37 +42,58 @@ import org.junit.runners.JUnit4 class CompoundCombinerTest { private val COUNT_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = NoiseKind.GAUSSIAN, maxPartitionsContributed = Int.MAX_VALUE, maxContributionsPerPartition = Int.MAX_VALUE, ) private val COUNT_AND_SUM_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(SUM)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -Double.MAX_VALUE, + maxTotalValue = Double.MAX_VALUE, + ) + ), noiseKind = NoiseKind.GAUSSIAN, maxPartitionsContributed = Int.MAX_VALUE, maxContributionsPerPartition = Int.MAX_VALUE, - minTotalValue = -Double.MAX_VALUE, - maxTotalValue = Double.MAX_VALUE, ) private val COUNT_AND_MEAN_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(MEAN)), + minValue = -100.0, + maxValue = 100.0, + ) + ), noiseKind = NoiseKind.GAUSSIAN, maxPartitionsContributed = 100, maxContributionsPerPartition = 10, - minValue = -100.0, - maxValue = 100.0, ) private val COUNT_AND_VARIANCE_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(VARIANCE)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = -100.0, + maxValue = 100.0, + ) + ), noiseKind = NoiseKind.GAUSSIAN, maxPartitionsContributed = 100, maxContributionsPerPartition = 10, - minValue = -100.0, - maxValue = 100.0, ) private val UNUSED_ALLOCATED_BUDGET = AllocatedBudget() @@ -119,6 +140,7 @@ class CompoundCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_AND_SUM_PARAMS.features[0] as ScalarFeatureSpec, ), ) ) @@ -148,6 +170,7 @@ class CompoundCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_AND_MEAN_PARAMS.features[0] as ScalarFeatureSpec, ) ) ) @@ -174,12 +197,13 @@ class CompoundCombinerTest { CompoundCombiner( listOf( VarianceCombiner( - COUNT_AND_MEAN_PARAMS, + COUNT_AND_VARIANCE_PARAMS, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_AND_VARIANCE_PARAMS.features[0] as ScalarFeatureSpec, ) ) ) @@ -217,6 +241,7 @@ class CompoundCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_AND_SUM_PARAMS.features[0] as ScalarFeatureSpec, ), ) ) @@ -277,6 +302,7 @@ class CompoundCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_AND_MEAN_PARAMS.features[0] as ScalarFeatureSpec, ) ) ) @@ -320,6 +346,7 @@ class CompoundCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_AND_VARIANCE_PARAMS.features[0] as ScalarFeatureSpec, ) ) ) @@ -370,6 +397,7 @@ class CompoundCombinerTest { UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_AND_SUM_PARAMS.features[0] as ScalarFeatureSpec, ), ) ) @@ -415,21 +443,23 @@ class CompoundCombinerTest { @Test fun computeMetrics_meanCombiner_returnsMeanMetric() { - val compoundCombiner = + val params = COUNT_AND_MEAN_PARAMS.copy(nonFeatureMetrics = ImmutableList.of()) + val combiner = CompoundCombiner( listOf( MeanCombiner( - COUNT_AND_MEAN_PARAMS.copy(metrics = ImmutableList.of(MetricDefinition(MEAN))), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) ) ) val dpAggregates = - compoundCombiner.computeMetrics( + combiner.computeMetrics( compoundAccumulator { meanAccumulator = meanAccumulator { count = 3 @@ -443,28 +473,35 @@ class CompoundCombinerTest { @Test fun computeMetrics_meanCombiner_returnsCountSumMean() { - val compoundCombiner = + val params = + COUNT_AND_MEAN_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), + minValue = -100.0, + maxValue = 100.0, + ) + ), + ) + val combiner = CompoundCombiner( listOf( MeanCombiner( - COUNT_AND_MEAN_PARAMS.copy( - metrics = - ImmutableList.of( - MetricDefinition(MEAN), - MetricDefinition(COUNT), - MetricDefinition(SUM), - ) - ), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) ) ) val dpAggregates = - compoundCombiner.computeMetrics( + combiner.computeMetrics( compoundAccumulator { meanAccumulator = meanAccumulator { count = 3 @@ -485,22 +522,24 @@ class CompoundCombinerTest { @Test fun computeMetrics_varianceCombiner_returnsVarianceMetric() { - val compoundCombiner = + val params = COUNT_AND_VARIANCE_PARAMS.copy(nonFeatureMetrics = ImmutableList.of()) + val combiner = CompoundCombiner( listOf( VarianceCombiner( - COUNT_AND_VARIANCE_PARAMS.copy(metrics = ImmutableList.of(MetricDefinition(VARIANCE))), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) ) ) val dpAggregates = - compoundCombiner.computeMetrics( + combiner.computeMetrics( compoundAccumulator { varianceAccumulator = varianceAccumulator { count = 10 @@ -515,30 +554,41 @@ class CompoundCombinerTest { @Test fun computeMetrics_varianceCombiner_returnsCountSumMeanVariance() { - val compoundCombiner = - CompoundCombiner( - listOf( - VarianceCombiner( - COUNT_AND_VARIANCE_PARAMS.copy( + val params = + COUNT_AND_VARIANCE_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", metrics = ImmutableList.of( MetricDefinition(MEAN), - MetricDefinition(COUNT), MetricDefinition(SUM), MetricDefinition(VARIANCE), - ) - ), + ), + minValue = -100.0, + maxValue = 100.0, + ) + ), + ) + val combiner = + CompoundCombiner( + listOf( + VarianceCombiner( + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) ) ) val dpAggregates = - compoundCombiner.computeMetrics( + combiner.computeMetrics( compoundAccumulator { varianceAccumulator = varianceAccumulator { count = 10 diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CountCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CountCombinerTest.kt index faff1082..72f4f3bc 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CountCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/CountCombinerTest.kt @@ -41,7 +41,7 @@ import org.mockito.kotlin.verify class CountCombinerTest { private val AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 3, maxContributionsPerPartition = 5, @@ -156,7 +156,7 @@ class CountCombinerTest { fun computeMetrics_addsNoise(noiseKind: NoiseKind, delta: Double) { val paramsWithNoise = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = noiseKind, maxPartitionsContributed = 30, maxContributionsPerPartition = 50, @@ -175,7 +175,7 @@ class CountCombinerTest { fun computeMetrics_passesCorrectParametersToNoise() { val params = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 3, maxContributionsPerPartition = 5, diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngineTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngineTest.kt index 5753b863..0ea1c3f2 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngineTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpEngineTest.kt @@ -104,12 +104,14 @@ class DpEngineTest { .aggregate( LocalCollection(sequenceOf()), // empty metrics are not allowed - COUNT_PARAMS.copy(metrics = ImmutableList.of()), + COUNT_PARAMS.copy(nonFeatureMetrics = ImmutableList.of()), testDataExtractors, LocalCollection(sequenceOf()), ) } - assertThat(e).hasMessageThat().contains("metrics must not be empty") + assertThat(e) + .hasMessageThat() + .contains("At least one of nonFeatureMetrics or features must be specified.") } @Test @@ -188,7 +190,17 @@ class DpEngineTest { ) val publicPartitions = LocalCollection(sequenceOf("US")) val dpEngine = DpEngine.createForTesting(LOCAL_EF, LARGE_BUDGET_SPEC, ZeroNoiseFactory()) - val params = COUNT_AND_SUM_PARAMS.copy(maxContributionsPerPartition = 2, maxTotalValue = 30.0) + val params = + COUNT_AND_SUM_PARAMS.copy( + maxContributionsPerPartition = 2, + features = + ImmutableList.of( + (COUNT_AND_SUM_PARAMS.features[0] as ScalarFeatureSpec).copy( + minTotalValue = -Double.MAX_VALUE, + maxTotalValue = 30.0, + ) + ), + ) val dpAggregates = dpEngine.aggregate(inputData, params, testDataExtractors, publicPartitions) @@ -220,7 +232,16 @@ class DpEngineTest { ) val publicPartitions = LocalCollection(sequenceOf("US", "NL")) val dpEngine = DpEngine.createForTesting(LOCAL_EF, LARGE_BUDGET_SPEC, ZeroNoiseFactory()) - val params = COUNT_AND_SUM_PARAMS.copy(minTotalValue = -25.0, maxTotalValue = 25.0) + val params = + COUNT_AND_SUM_PARAMS.copy( + features = + ImmutableList.of( + (COUNT_AND_SUM_PARAMS.features[0] as ScalarFeatureSpec).copy( + minTotalValue = -25.0, + maxTotalValue = 25.0, + ) + ) + ) val dpAggregates = dpEngine.aggregate(inputData, params, testDataExtractors, publicPartitions) @@ -255,17 +276,23 @@ class DpEngineTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = + nonFeatureMetrics = ImmutableList.of( MetricDefinition(COUNT, AbsoluteBudgetPerOpSpec(0.1, 1e-5)), - MetricDefinition(SUM, AbsoluteBudgetPerOpSpec(0.1, 1e-5)), MetricDefinition(PRIVACY_ID_COUNT, AbsoluteBudgetPerOpSpec(0.1, 1e-5)), ), noiseKind = GAUSSIAN, maxPartitionsContributed = 5, maxContributionsPerPartition = 5, - minTotalValue = -5.0, - maxTotalValue = 5.0, + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM, AbsoluteBudgetPerOpSpec(0.1, 1e-5))), + minTotalValue = -5.0, + maxTotalValue = 5.0, + ) + ), ) val dpAggregates = @@ -293,13 +320,19 @@ class DpEngineTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = - ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(SUM), MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM), MetricDefinition(MEAN)), + minValue = -2.0, + maxValue = 2.0, + ) + ), noiseKind = LAPLACE, maxPartitionsContributed = 1, maxContributionsPerPartition = 1, - minValue = -2.0, - maxValue = 2.0, ) val dpAggregates = @@ -322,18 +355,24 @@ class DpEngineTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = ImmutableList.of( - MetricDefinition(COUNT), - MetricDefinition(SUM), - MetricDefinition(MEAN), - MetricDefinition(VARIANCE), + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of( + MetricDefinition(SUM), + MetricDefinition(MEAN), + MetricDefinition(VARIANCE), + ), + minValue = -2.0, + maxValue = 2.0, + ) ), noiseKind = LAPLACE, maxPartitionsContributed = 1, maxContributionsPerPartition = 1, - minValue = -2.0, - maxValue = 2.0, ) val dpAggregates = dpEngine.aggregate(inputData, params, testDataExtractors, publicPartitions) @@ -426,18 +465,24 @@ class DpEngineTest { AggregationParams( contributionBoundingLevel = DATASET_LEVEL, noiseKind = GAUSSIAN, - metrics = + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT), MetricDefinition(COUNT)), + features = ImmutableList.of( - MetricDefinition(PRIVACY_ID_COUNT), - MetricDefinition(COUNT), - MetricDefinition(SUM), - MetricDefinition(MEAN), - MetricDefinition(QUANTILES(ranks = ImmutableList.of())), + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of( + MetricDefinition(SUM), + MetricDefinition(MEAN), + MetricDefinition(QUANTILES(ranks = ImmutableList.of())), + ), + minValue = -10.0, + maxValue = 10.0, + ) ), maxPartitionsContributed = 10, maxContributionsPerPartition = 20, - minValue = -10.0, - maxValue = 10.0, ), PartitionAndPerPartitionSampler::class.java, ), @@ -471,18 +516,24 @@ class DpEngineTest { AggregationParams( contributionBoundingLevel = PARTITION_LEVEL, noiseKind = GAUSSIAN, - metrics = + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT), MetricDefinition(COUNT)), + features = ImmutableList.of( - MetricDefinition(PRIVACY_ID_COUNT), - MetricDefinition(COUNT), - MetricDefinition(SUM), - MetricDefinition(MEAN), - MetricDefinition(QUANTILES(ranks = ImmutableList.of())), + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of( + MetricDefinition(SUM), + MetricDefinition(MEAN), + MetricDefinition(QUANTILES(ranks = ImmutableList.of())), + ), + minValue = -10.0, + maxValue = 10.0, + ) ), maxPartitionsContributed = 1, maxContributionsPerPartition = 20, - minValue = -10.0, - maxValue = 10.0, ), PerPartitionContributionsSampler::class.java, ), @@ -890,18 +941,22 @@ class DpEngineTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(COUNT, testCase.requestedCountBudget)), + features = ImmutableList.of( - MetricDefinition(COUNT, testCase.requestedCountBudget), - MetricDefinition(SUM, testCase.requestedSumBudget), + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM, testCase.requestedSumBudget)), + minTotalValue = -100.0, + maxTotalValue = 100.0, + ) ), noiseKind = GAUSSIAN, // Choose large values to avoid contribution clamping but keep the values low enough to // avoid sensitivity overflow. maxPartitionsContributed = 100, maxContributionsPerPartition = 100, - minTotalValue = -100.0, - maxTotalValue = 100.0, ) val result = @@ -1032,17 +1087,24 @@ class DpEngineTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(COUNT, testCase.requestedCountBudget)), + features = ImmutableList.of( - MetricDefinition(COUNT, testCase.requestedCountBudget), - MetricDefinition(SUM, testCase.requestedSumBudget), - MetricDefinition(MEAN, testCase.requestedMeanBudget), + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of( + MetricDefinition(SUM, testCase.requestedSumBudget), + MetricDefinition(MEAN, testCase.requestedMeanBudget), + ), + minValue = -10.0, + maxValue = 10.0, + ) ), noiseKind = GAUSSIAN, maxPartitionsContributed = 10, maxContributionsPerPartition = 20, - minValue = -10.0, - maxValue = 10.0, ) val result = @@ -1133,12 +1195,20 @@ class DpEngineTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(VARIANCE, testCase.requestedVarianceBudget)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of(MetricDefinition(VARIANCE, testCase.requestedVarianceBudget)), + minValue = -10.0, + maxValue = 10.0, + ) + ), noiseKind = GAUSSIAN, maxPartitionsContributed = 10, maxContributionsPerPartition = 20, - minValue = -10.0, - maxValue = 10.0, ) val result = @@ -1189,57 +1259,87 @@ class DpEngineTest { DpEngineBudgetSpec(budget = TotalBudget(epsilon = 2000.0, delta = 0.999999)) private val PRIVACY_ID_COUNT_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 1_000_000, maxContributionsPerPartition = 1_000_000, ) private val COUNT_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 1_000_000, maxContributionsPerPartition = 1_000_000, ) private val SUM_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(SUM)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -Double.MAX_VALUE, + maxTotalValue = Double.MAX_VALUE, + ) + ), noiseKind = GAUSSIAN, maxPartitionsContributed = 1_000_000, - minTotalValue = -Double.MAX_VALUE, - maxTotalValue = Double.MAX_VALUE, ) private val COUNT_AND_SUM_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(SUM)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -100.0, + maxTotalValue = 100.0, + ) + ), noiseKind = GAUSSIAN, // Choose large values to avoid contribution clamping but keep the values low enough to // avoid sensitivity overflow. maxPartitionsContributed = 100, maxContributionsPerPartition = 100, - minTotalValue = -100.0, - maxTotalValue = 100.0, ) private val MEAN_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(MEAN)), + minValue = -10.0, + maxValue = 10.0, + ) + ), noiseKind = GAUSSIAN, maxPartitionsContributed = 10, maxContributionsPerPartition = 20, - minValue = -10.0, - maxValue = 10.0, ) private val QUANTILES_PARAMS = AggregationParams( - metrics = + nonFeatureMetrics = ImmutableList.of(), + features = ImmutableList.of( - MetricDefinition(QUANTILES(ranks = ImmutableList.of(0.0001, 0.0, 0.5, 0.999, 1.0))) + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of( + MetricDefinition( + QUANTILES(ranks = ImmutableList.of(0.0001, 0.0, 0.5, 0.999, 1.0)) + ) + ), + minValue = -10.0, + maxValue = 10.0, + ) ), noiseKind = GAUSSIAN, maxPartitionsContributed = 10, maxContributionsPerPartition = 20, - minValue = -10.0, - maxValue = 10.0, ) private val LOCAL_EF = LocalEncoderFactory() } diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParamsTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParamsTest.kt index 7bc5f472..1c33b01c 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParamsTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/DpFunctionsParamsTest.kt @@ -56,12 +56,16 @@ class DpFunctionsParamsTest { ) validateAggregationParams( AGGREGATION_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(), maxContributionsPerPartition = null, - metrics = ImmutableList.of(MetricDefinition(SUM)), - minValue = null, - maxValue = null, - minTotalValue = 1.0, - maxTotalValue = 2.0, + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = 1.0, + maxTotalValue = 2.0, + ) + ), ), usePublicPartitions = true, hasValueExtractor = true, @@ -69,15 +73,17 @@ class DpFunctionsParamsTest { validateAggregationParams( AGGREGATION_PARAMS.copy( maxContributionsPerPartition = null, - metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + createVectorFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 2, + vectorMaxTotalNorm = 1.0, + normKind = NormKind.L1, + ) + ), noiseKind = NoiseKind.LAPLACE, - minValue = null, - maxValue = null, - minTotalValue = null, - maxTotalValue = null, - vectorSize = 2, - vectorMaxTotalNorm = 1.0, - vectorNormKind = NormKind.L1, ), usePublicPartitions = false, hasValueExtractor = true, @@ -94,6 +100,11 @@ class DpFunctionsParamsTest { aggregationParams = AGGREGATION_PARAMS.copy(maxPartitionsContributed = 0), exceptionMessage = "maxPartitionsContributed must be positive. Provided value: 0.", ), + OVER_LIMIT_MAX_PARTITIONS_CONTRIBUTED( + aggregationParams = AGGREGATION_PARAMS.copy(maxPartitionsContributed = 110_000_000), + exceptionMessage = + "maxPartitionsContributed must be less than 100000000 Provided values: maxPartitionsContributed=110000000", + ), PARTITION_LEVEL_CONTRIBUTION_BOUNDING_MAX_PARTITIONS_CONTRIBUTED( aggregationParams = AGGREGATION_PARAMS.copy( @@ -118,8 +129,33 @@ class DpFunctionsParamsTest { exceptionMessage = "preThreshold must be positive. Provided value: 0", ), NO_METRICS( - aggregationParams = AGGREGATION_PARAMS.copy(metrics = ImmutableList.of()), - exceptionMessage = "metrics must not be empty.", + aggregationParams = + AGGREGATION_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(), + features = ImmutableList.of(), + ), + exceptionMessage = "At least one of nonFeatureMetrics or features must be specified.", + ), + INVALID_NON_FEATURE_METRIC( + aggregationParams = + AGGREGATION_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(SUM)), + features = ImmutableList.of(), + ), + exceptionMessage = + "Only COUNT and PRIVACY_ID_COUNT are allowed in AggregationParams.nonFeatureMetrics. Other metrics should be provided via AggregationParams.features.", + ), + FEATURE_METRIC_IS_COUNT( + aggregationParams = + AGGREGATION_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + createScalarFeatureSpec(nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT))) + ), + ), + exceptionMessage = + "COUNT and PRIVACY_ID_COUNT are not allowed in features. They should be provided via AggregationParams.nonFeatureMetrics.", ), ZERO_MAX_CONTRIBUTIONS_PER_PARTITION( aggregationParams = AGGREGATION_PARAMS.copy(maxContributionsPerPartition = 0), @@ -180,90 +216,132 @@ class DpFunctionsParamsTest { "maxContributions and maxPartitionsContributed are mutually exclusive. Provided values: maxContributions=1, maxPartitionsContributed=1", ), MIN_VALUE_SET_MAX_VALUE_NOT_SET( - aggregationParams = AGGREGATION_PARAMS.copy(minValue = 1.0, maxValue = null), + aggregationParams = + AGGREGATION_PARAMS.copy( + features = ImmutableList.of(createScalarFeatureSpec(minValue = 1.0, maxValue = null)) + ), exceptionMessage = "minValue and maxValue must be simultaneously equal or not equal to null.", ), MIN_VALUE_NOT_SET_MAX_VALUE_SET( - aggregationParams = AGGREGATION_PARAMS.copy(minValue = null, maxValue = 2.0), + aggregationParams = + AGGREGATION_PARAMS.copy( + features = ImmutableList.of(createScalarFeatureSpec(minValue = null, maxValue = 2.0)) + ), exceptionMessage = "minValue and maxValue must be simultaneously equal or not equal to " + "null. Provided values: minValue=null, maxValue=2.0", ), MIN_VALUE_GREATER_THAN_MAX_VALUE( - aggregationParams = AGGREGATION_PARAMS.copy(minValue = 1.5, maxValue = 1.0), + aggregationParams = + AGGREGATION_PARAMS.copy( + features = ImmutableList.of(createScalarFeatureSpec(minValue = 1.5, maxValue = 1.0)) + ), exceptionMessage = "minValue must be less than maxValue. Provided values: " + "minValue=1.5, maxValue=1.0", ), MIN_VALUE_IS_EQUAL_TO_MAX_VALUE( - aggregationParams = AGGREGATION_PARAMS.copy(minValue = 1.5, maxValue = 1.5), + aggregationParams = + AGGREGATION_PARAMS.copy( + features = ImmutableList.of(createScalarFeatureSpec(minValue = 1.5, maxValue = 1.5)) + ), exceptionMessage = "minValue must be less than maxValue. Provided values: " + "minValue=1.5, maxValue=1.5", ), MIN_TOTAL_VALUE_SET_MAX_TOTAL_VALUE_NOT_SET( - aggregationParams = AGGREGATION_PARAMS.copy(minTotalValue = 1.0, maxTotalValue = null), + aggregationParams = + AGGREGATION_PARAMS.copy( + features = + ImmutableList.of(createScalarFeatureSpec(minTotalValue = 1.0, maxTotalValue = null)) + ), exceptionMessage = "minTotalValue and maxTotalValue must be simultaneously equal or not equal to null. " + "Provided values: minTotalValue=1.0, maxTotalValue=null", ), MIN_TOTAL_VALUE_NOT_SET_MAX_TOTAL_VALUE_SET( - aggregationParams = AGGREGATION_PARAMS.copy(minTotalValue = null, maxTotalValue = 2.0), + aggregationParams = + AGGREGATION_PARAMS.copy( + features = + ImmutableList.of(createScalarFeatureSpec(minTotalValue = null, maxTotalValue = 2.0)) + ), exceptionMessage = "minTotalValue and maxTotalValue must be simultaneously equal or not equal to null.", ), MIN_TOTAL_VALUE_GREATER_THAN_MAX_TOTAL_VALUE( - aggregationParams = AGGREGATION_PARAMS.copy(minTotalValue = 2.0, maxTotalValue = 0.0), + aggregationParams = + AGGREGATION_PARAMS.copy( + features = + ImmutableList.of(createScalarFeatureSpec(minTotalValue = 2.0, maxTotalValue = 0.0)) + ), exceptionMessage = "minTotalValue must be less or equal to maxTotalValue. Provided values: " + "minTotalValue=2.0, maxTotalValue=0.0", ), - MEAN_WITH_TOTAL_VALUE( + SCALAR_FEATURE_WITHOUT_BOUNDS( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(SUM), MetricDefinition(MEAN)), - minValue = 0.0, - maxValue = 3.0, - minTotalValue = 1.5, - maxTotalValue = 5.0, + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(SUM)), + minValue = null, + maxValue = null, + minTotalValue = null, + maxTotalValue = null, + ) + ), ), - exceptionMessage = - "(minTotalValue, maxTotalValue) should not be set if MEAN metric is requested", + exceptionMessage = "(minTotalValue, maxTotalValue) must be set for SUM metrics.", ), - VARIANCE_WITH_TOTAL_VALUE( + MEAN_WITH_TOTAL_VALUE( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(SUM), MetricDefinition(VARIANCE)), - minValue = 0.0, - maxValue = 3.0, - minTotalValue = 1.5, - maxTotalValue = 5.0, + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(SUM), MetricDefinition(MEAN)), + minValue = 0.0, + maxValue = 3.0, + minTotalValue = 1.5, + maxTotalValue = 5.0, + ) + ) ), exceptionMessage = - "(minTotalValue, maxTotalValue) should not be set if VARIANCE metric is requested", + "(minTotalValue, maxTotalValue) should not be set if MEAN metric is requested", ), - MAX_CONTRIBUTIONS_PER_PARTITION_MAX_CONTRIBUTIONS_NOT_SET_FOR_COUNT( + VARIANCE_WITH_TOTAL_VALUE( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(SUM)), - maxContributionsPerPartition = null, - maxContributions = null, - minTotalValue = -1.0, - maxTotalValue = 1.0, - minValue = null, - maxValue = null, + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(SUM), MetricDefinition(VARIANCE)), + minValue = 0.0, + maxValue = 3.0, + minTotalValue = 1.5, + maxTotalValue = 5.0, + ) + ) ), exceptionMessage = - "maxContributionsPerPartition or maxContributions must be set for COUNT metric.", + "(minTotalValue, maxTotalValue) should not be set if VARIANCE metric is requested", ), MAX_CONTRIBUTIONS_PER_PARTITION_MAX_CONTRIBUTIONS_NOT_SET_FOR_MEAN( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), + minTotalValue = -1.0, + maxTotalValue = 1.0, + ) + ), maxContributionsPerPartition = null, maxContributions = null, - minTotalValue = -1.0, - maxTotalValue = 1.0, - minValue = null, - maxValue = null, ), exceptionMessage = "maxContributionsPerPartition or maxContributions must be set for MEAN metric.", @@ -271,44 +349,59 @@ class DpFunctionsParamsTest { MAX_CONTRIBUTIONS_PER_PARTITION_MAX_CONTRIBUTIONS_NOT_SET_FOR_QUANTILES( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = + nonFeatureMetrics = ImmutableList.of(), + features = ImmutableList.of( - MetricDefinition(QUANTILES(ranks = ImmutableList.of())), - MetricDefinition(SUM), + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of( + MetricDefinition(QUANTILES(ranks = ImmutableList.of())), + MetricDefinition(SUM), + ), + minTotalValue = -1.0, + maxTotalValue = 1.0, + ) ), maxContributionsPerPartition = null, - minTotalValue = -1.0, - maxTotalValue = 1.0, - minValue = null, - maxValue = null, ), exceptionMessage = "maxContributionsPerPartition must be set for QUANTILES metric.", ), MIN_TOTAL_VALUE_NOT_SET_FOR_SUM( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(SUM)), - minTotalValue = null, - maxTotalValue = null, + features = + ImmutableList.of( + createScalarFeatureSpec(nonFeatureMetrics = ImmutableList.of(MetricDefinition(SUM))) + ) ), exceptionMessage = "(minTotalValue, maxTotalValue) must be set for SUM metrics.", ), MIN_VALUE_NOT_SET_FOR_MEAN( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), - minTotalValue = 0.0, - maxTotalValue = 1.0, - minValue = null, - maxValue = null, + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), + minValue = null, + maxValue = null, + minTotalValue = 0.0, + maxTotalValue = 1.0, + ) + ) ), exceptionMessage = "(minValue, maxValue) must be set for MEAN metric.", ), VALUE_EXTRACTOR_NOT_SET_FOR_SUM_AND_MEAN( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = - ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(MEAN), MetricDefinition(SUM)) + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)) + ) + ), ), hasValueExtractor = false, exceptionMessage = "Metrics [MEAN, SUM] require a value extractor.", @@ -316,32 +409,49 @@ class DpFunctionsParamsTest { MIN_VALUE_NOT_SET_FOR_QUANTILES( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(QUANTILES(ranks = ImmutableList.of()))), - minTotalValue = 0.0, - maxTotalValue = 1.0, - minValue = null, - maxValue = null, + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(QUANTILES(ranks = ImmutableList.of()))), + minValue = null, + maxValue = null, + minTotalValue = 0.0, + maxTotalValue = 1.0, + ) + ) ), exceptionMessage = "(minValue, maxValue) must be set for QUANTILES metric.", ), BUDGET_SPEC_SET_FOR_MEAN_AND_COUNT( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = + nonFeatureMetrics = ImmutableList.of( - MetricDefinition(MEAN, RelativeBudgetPerOpSpec(weight = 1.0)), - MetricDefinition(COUNT, AbsoluteBudgetPerOpSpec(epsilon = 2.0, delta = 1e-12)), - ) + MetricDefinition(COUNT, AbsoluteBudgetPerOpSpec(epsilon = 2.0, delta = 1e-12)) + ), + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(MEAN, RelativeBudgetPerOpSpec(weight = 1.0))) + ) + ), ), exceptionMessage = "BudgetPerOpSpec can not be set for both COUNT and MEAN metrics.", ), BUDGET_SPEC_SET_FOR_MEAN_AND_SUM( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = + features = ImmutableList.of( - MetricDefinition(MEAN, RelativeBudgetPerOpSpec(weight = 1.0)), - MetricDefinition(SUM, RelativeBudgetPerOpSpec(weight = 2.0)), + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of( + MetricDefinition(MEAN, RelativeBudgetPerOpSpec(weight = 1.0)), + MetricDefinition(SUM, RelativeBudgetPerOpSpec(weight = 2.0)), + ) + ) ) ), exceptionMessage = "BudgetPerOpSpec can not be set for both SUM and MEAN metrics.", @@ -349,13 +459,17 @@ class DpFunctionsParamsTest { MAX_CONTRIBUTIONS_PER_PARTITION_MAX_CONTRIBUTIONS_NOT_SET_FOR_VARIANCE( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minTotalValue = -1.0, + maxTotalValue = 1.0, + ) + ), maxContributionsPerPartition = null, maxContributions = null, - minTotalValue = -1.0, - maxTotalValue = 1.0, - minValue = null, - maxValue = null, ), exceptionMessage = "maxContributionsPerPartition or maxContributions must be set for VARIANCE metric.", @@ -363,32 +477,31 @@ class DpFunctionsParamsTest { MIN_VALUE_NOT_SET_FOR_VARIANCE( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VARIANCE)), - minTotalValue = 0.0, - maxTotalValue = 1.0, - minValue = null, - maxValue = null, - ), - exceptionMessage = "(minValue, maxValue) must be set for VARIANCE metric.", - ), - MAX_VALUE_NOT_SET_FOR_VARIANCE( - aggregationParams = - AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VARIANCE)), - minTotalValue = -1.0, - maxTotalValue = 0.0, - minValue = null, - maxValue = null, + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = null, + maxValue = null, + minTotalValue = 0.0, + maxTotalValue = 1.0, + ) + ) ), exceptionMessage = "(minValue, maxValue) must be set for VARIANCE metric.", ), BUDGET_SPEC_SET_FOR_VARIANCE_AND_MEAN( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = + features = ImmutableList.of( - MetricDefinition(VARIANCE, RelativeBudgetPerOpSpec(weight = 1.0)), - MetricDefinition(MEAN, RelativeBudgetPerOpSpec(weight = 1.0)), + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of( + MetricDefinition(VARIANCE, RelativeBudgetPerOpSpec(weight = 1.0)), + MetricDefinition(MEAN, RelativeBudgetPerOpSpec(weight = 1.0)), + ) + ) ) ), exceptionMessage = "BudgetPerOpSpec can not be set for both MEAN and VARIANCE metrics.", @@ -396,21 +509,34 @@ class DpFunctionsParamsTest { BUDGET_SPEC_SET_FOR_VARIANCE_AND_COUNT( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = + nonFeatureMetrics = ImmutableList.of( - MetricDefinition(VARIANCE, RelativeBudgetPerOpSpec(weight = 1.0)), - MetricDefinition(COUNT, AbsoluteBudgetPerOpSpec(epsilon = 2.0, delta = 1e-12)), - ) + MetricDefinition(COUNT, AbsoluteBudgetPerOpSpec(epsilon = 2.0, delta = 1e-12)) + ), + features = + ImmutableList.of( + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of( + MetricDefinition(VARIANCE, RelativeBudgetPerOpSpec(weight = 1.0)) + ) + ) + ), ), exceptionMessage = "BudgetPerOpSpec can not be set for both COUNT and VARIANCE metrics.", ), BUDGET_SPEC_SET_FOR_VARIANCE_AND_SUM( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = + features = ImmutableList.of( - MetricDefinition(VARIANCE, RelativeBudgetPerOpSpec(weight = 1.0)), - MetricDefinition(SUM, RelativeBudgetPerOpSpec(weight = 2.0)), + createScalarFeatureSpec( + nonFeatureMetrics = + ImmutableList.of( + MetricDefinition(VARIANCE, RelativeBudgetPerOpSpec(weight = 1.0)), + MetricDefinition(SUM, RelativeBudgetPerOpSpec(weight = 2.0)), + ) + ) ) ), exceptionMessage = "BudgetPerOpSpec can not be set for both SUM and VARIANCE metrics.", @@ -426,7 +552,7 @@ class DpFunctionsParamsTest { DUPLICATE_METRIC_TYPES( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = + nonFeatureMetrics = ImmutableList.of( MetricDefinition(COUNT), MetricDefinition(PRIVACY_ID_COUNT), @@ -434,78 +560,79 @@ class DpFunctionsParamsTest { ) ), exceptionMessage = - "metrics must not contain duplicate metric types. Provided " + + "nonFeatureMetrics must not contain duplicate metric types. Provided " + "[COUNT, PRIVACY_ID_COUNT, COUNT].", ), - NORM_KIND_NOT_SET_FOR_VECTOR_SUM( + DUPLICATE_METRIC_TYPES_IN_FEATURE( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), - vectorNormKind = null, - vectorMaxTotalNorm = 2.3, - vectorSize = 2, + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + createScalarFeatureSpec( + featureId = "feature1", + nonFeatureMetrics = ImmutableList.of(MetricDefinition(SUM), MetricDefinition(SUM)), + minTotalValue = 1.0, + maxTotalValue = 2.0, + ) + ), ), - exceptionMessage = "vectorNormKind must be set for VECTOR_SUM metric.", + exceptionMessage = + "feature feature1 must not contain duplicate metric types. Provided [SUM, SUM]", ), - L2_NORM_KIND_WHEN_LAPLACE_NOISE_IS_USED( + DUPLICATE_FEATURE_ID( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), - noiseKind = NoiseKind.LAPLACE, - vectorNormKind = NormKind.L2, - vectorMaxTotalNorm = 2.3, - vectorSize = 2, + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + createScalarFeatureSpec( + featureId = "feature1", + nonFeatureMetrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = 1.0, + maxTotalValue = 2.0, + ), + createScalarFeatureSpec( + featureId = "feature1", + nonFeatureMetrics = ImmutableList.of(MetricDefinition(MEAN)), + minValue = 1.0, + maxValue = 2.0, + ), + ), ), - exceptionMessage = "vectorNormKind must be L_INF or L1 for LAPLACE noise.", + exceptionMessage = "featureId must be unique. Provided [feature1, feature1]", ), - L1_NORM_KIND_WHEN_LAPLACE_NOISE_IS_USED( + L2_NORM_KIND_WHEN_LAPLACE_NOISE_IS_USED( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), - noiseKind = NoiseKind.GAUSSIAN, - vectorNormKind = NormKind.L1, - vectorMaxTotalNorm = 2.3, - vectorSize = 2, + noiseKind = NoiseKind.LAPLACE, + features = + ImmutableList.of( + createVectorFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + normKind = NormKind.L2, + vectorMaxTotalNorm = 2.3, + vectorSize = 2, + ) + ), ), - exceptionMessage = "vectorNormKind must be L_INF or L2 for GAUSSIAN noise.", + exceptionMessage = "vectorNormKind must be L_INF or L1 for LAPLACE noise.", ), - MAX_TOTAL_NORM_NOT_SET_FOR_VECTOR_SUM( + L1_NORM_KIND_WHEN_GAUSSIAN_NOISE_IS_USED( aggregationParams = AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), noiseKind = NoiseKind.GAUSSIAN, - vectorNormKind = NormKind.L2, - vectorMaxTotalNorm = null, - vectorSize = 2, - ), - exceptionMessage = "vectorMaxTotalNorm must be set for VECTOR_SUM metric.", - ), - VECTOR_SIZE_NOT_SET_FOR_VECTOR_SUM( - aggregationParams = - AGGREGATION_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), - vectorNormKind = NormKind.L_INF, - vectorMaxTotalNorm = 1.0, - vectorSize = null, - ), - exceptionMessage = "vectorSize must be set for VECTOR_SUM metric.", - ), - VECTOR_SUM_IS_REQUESTED_TOGETHER_WITH_SCALAR_METRICS( - aggregationParams = - AGGREGATION_PARAMS.copy( - metrics = + features = ImmutableList.of( - MetricDefinition(VECTOR_SUM), - MetricDefinition(SUM), - MetricDefinition(MEAN), - MetricDefinition(VARIANCE), + createVectorFeatureSpec( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + normKind = NormKind.L1, + vectorMaxTotalNorm = 2.3, + vectorSize = 2, + ) ), - vectorNormKind = NormKind.L_INF, - vectorMaxTotalNorm = 1.0, - vectorSize = 3, ), - exceptionMessage = - "VECTOR_SUM can not be computed together with scalar metrics such as SUM, MEAN, VARIANCE and QUANTILES.", + exceptionMessage = "vectorNormKind must be L_INF or L2 for GAUSSIAN noise.", ), } @@ -593,13 +720,13 @@ class DpFunctionsParamsTest { companion object { val AGGREGATION_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(PRIVACY_ID_COUNT)), + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(PRIVACY_ID_COUNT)), noiseKind = NoiseKind.LAPLACE, maxPartitionsContributed = 1, maxContributionsPerPartition = 1, maxContributions = null, - minValue = 0.0, - maxValue = 1.0, + features = ImmutableList.of(createScalarFeatureSpec(minValue = 0.0, maxValue = 1.0)), ) val SELECT_PARTITIONS_PARAMS = @@ -608,5 +735,30 @@ class DpFunctionsParamsTest { budget = AbsoluteBudgetPerOpSpec(epsilon = 1.0, delta = 1e-12), preThreshold = 10, ) + + private fun createScalarFeatureSpec( + featureId: String = "value", + nonFeatureMetrics: ImmutableList = ImmutableList.of(), + minValue: Double? = 0.0, + maxValue: Double? = 1.0, + minTotalValue: Double? = null, + maxTotalValue: Double? = null, + ) = + ScalarFeatureSpec( + featureId, + nonFeatureMetrics, + minValue, + maxValue, + minTotalValue, + maxTotalValue, + ) + + private fun createVectorFeatureSpec( + featureId: String = "value", + nonFeatureMetrics: ImmutableList = ImmutableList.of(), + vectorSize: Int = 1, + normKind: NormKind = NormKind.L1, + vectorMaxTotalNorm: Double = 1.0, + ) = VectorFeatureSpec(featureId, nonFeatureMetrics, vectorSize, normKind, vectorMaxTotalNorm) } } diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/EndToEndTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/EndToEndTest.kt index 62e67922..32eccd7b 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/EndToEndTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/EndToEndTest.kt @@ -75,13 +75,19 @@ class EndToEndTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = - ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(SUM), MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM), MetricDefinition(MEAN)), + minValue = -2.0, + maxValue = 2.0, + ) + ), noiseKind = LAPLACE, maxPartitionsContributed = 1, maxContributionsPerPartition = 1, - minValue = -2.0, - maxValue = 2.0, ) val dpAggregates = @@ -104,12 +110,20 @@ class EndToEndTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(PRIVACY_ID_COUNT)), + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(PRIVACY_ID_COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(), + minValue = -2.0, + maxValue = 2.0, + ) + ), noiseKind = LAPLACE, maxPartitionsContributed = 1, maxContributionsPerPartition = 1, - minValue = -2.0, - maxValue = 2.0, ) val dpAggregates = @@ -146,7 +160,7 @@ class EndToEndTest { ) val params = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = LAPLACE, // Contribution bounding would be applied if it was not disabled. maxPartitionsContributed = 1, @@ -190,7 +204,7 @@ class EndToEndTest { ) val params = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = LAPLACE, maxPartitionsContributed = 2, // Contributions to each of the two partitions are kept. maxContributionsPerPartition = 1, // Double contributions per partition are removed. @@ -218,13 +232,19 @@ class EndToEndTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = - ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(SUM), MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM), MetricDefinition(MEAN)), + minValue = -2.0, + maxValue = 2.0, + ) + ), noiseKind = LAPLACE, maxPartitionsContributed = 1, maxContributionsPerPartition = 1, - minValue = -2.0, - maxValue = 2.0, ) val dpAggregates = @@ -247,17 +267,23 @@ class EndToEndTest { // Use low bounds to avoid sensitivity overflow when adding noise. val params = AggregationParams( - metrics = + nonFeatureMetrics = ImmutableList.of( MetricDefinition(COUNT, AbsoluteBudgetPerOpSpec(0.1, 1e-5)), - MetricDefinition(SUM, AbsoluteBudgetPerOpSpec(0.1, 1e-5)), MetricDefinition(PRIVACY_ID_COUNT, AbsoluteBudgetPerOpSpec(0.1, 1e-5)), ), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM, AbsoluteBudgetPerOpSpec(0.1, 1e-5))), + minTotalValue = -5.0, + maxTotalValue = 5.0, + ) + ), noiseKind = GAUSSIAN, maxPartitionsContributed = 5, maxContributionsPerPartition = 5, - minTotalValue = -5.0, - maxTotalValue = 5.0, ) val dpAggregates = diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/MeanCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/MeanCombinerTest.kt index 1e566584..270fe6f5 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/MeanCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/MeanCombinerTest.kt @@ -42,12 +42,14 @@ class MeanCombinerTest { companion object { private val AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec("value", ImmutableList.of(MetricDefinition(MEAN)), -10.0, 10.0) + ), noiseKind = NoiseKind.GAUSSIAN, maxPartitionsContributed = 3, maxContributionsPerPartition = 5, - minValue = -10.0, - maxValue = 10.0, ) private val noiseMock: Noise = mock() @@ -70,6 +72,7 @@ class MeanCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + AGG_PARAMS.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.emptyAccumulator() @@ -85,13 +88,17 @@ class MeanCombinerTest { @Test fun createAccumulator_doesNotClampContributionsWithinBounds() { + val featureSpec = + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy(minValue = -10.0, maxValue = 10.0) + val params = AGG_PARAMS.copy(features = ImmutableList.of(featureSpec)) val combiner = MeanCombiner( - AGG_PARAMS.copy(minValue = -10.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + featureSpec, ) val accumulator = @@ -108,13 +115,17 @@ class MeanCombinerTest { @Test fun createAccumulator_privacyLevelWithContributionBounding_clampsValues() { + val featureSpec = + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy(minValue = -10.0, maxValue = 10.0) + val params = AGG_PARAMS.copy(features = ImmutableList.of(featureSpec)) val combiner = MeanCombiner( - AGG_PARAMS.copy(minValue = -10.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + featureSpec, ) val accumulator = @@ -133,13 +144,17 @@ class MeanCombinerTest { @Test fun createAccumulator_fullTestMode_doesNotClampValues() { + val featureSpec = + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy(minValue = -10.0, maxValue = 10.0) + val params = AGG_PARAMS.copy(features = ImmutableList.of(featureSpec)) val combiner = MeanCombiner( - AGG_PARAMS.copy(minValue = -10.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), FULL_TEST_MODE, + featureSpec, ) val accumulator = @@ -158,13 +173,19 @@ class MeanCombinerTest { @Test fun createAccumulator_normalizesSum() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of((AGG_PARAMS.features[0] as ScalarFeatureSpec).copy(minValue = 5.0)) + ) val combiner = MeanCombiner( - AGG_PARAMS.copy(minValue = 5.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = @@ -181,13 +202,21 @@ class MeanCombinerTest { @Test fun createAccumulator_normalizationAndClamping() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy(minValue = 5.0, maxValue = 10.0) + ) + ) val combiner = MeanCombiner( - AGG_PARAMS.copy(minValue = 5.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = @@ -206,13 +235,19 @@ class MeanCombinerTest { @Test fun createAccumulator_aggregatesMultipleElements() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of((AGG_PARAMS.features[0] as ScalarFeatureSpec).copy(minValue = 4.0)) + ) val combiner = MeanCombiner( - AGG_PARAMS.copy(minValue = 4.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = @@ -238,6 +273,7 @@ class MeanCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + AGG_PARAMS.features[0] as ScalarFeatureSpec, ) val accumulator = @@ -267,19 +303,27 @@ class MeanCombinerTest { countBudget.initialize(2.0, 1e-5) val sumBudget = AllocatedBudget() sumBudget.initialize(1.0, 1e-3) + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy( + metrics = ImmutableList.of(MetricDefinition(MEAN)), + minValue = 4.0, + maxValue = 10.0, + ) + ), + maxPartitionsContributed = 5, + maxContributionsPerPartition = 7, + ) val combiner = MeanCombiner( - AGG_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(MEAN)), - maxPartitionsContributed = 5, - maxContributionsPerPartition = 7, - minValue = 4.0, - maxValue = 10.0, - ), + params, countBudget, sumBudget, noiseFactoryMock, ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = meanAccumulator { count = 10 @@ -316,25 +360,29 @@ class MeanCombinerTest { val sumBudget = AllocatedBudget() sumBudget.initialize(10000.0, 0.0) + val params = + AGG_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy( + metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), + minValue = 4.0, + maxValue = 12.0, + ) + ), + maxPartitionsContributed = 5, + maxContributionsPerPartition = 7, + noiseKind = NoiseKind.LAPLACE, + ) val combiner = MeanCombiner( - AGG_PARAMS.copy( - metrics = - ImmutableList.of( - MetricDefinition(MEAN), - MetricDefinition(SUM), - MetricDefinition(COUNT), - ), - maxPartitionsContributed = 5, - maxContributionsPerPartition = 7, - minValue = 4.0, - maxValue = 12.0, - noiseKind = NoiseKind.LAPLACE, - ), + params, countBudget, sumBudget, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = meanAccumulator { @@ -353,31 +401,15 @@ class MeanCombinerTest { assertThat(result.mean).isWithin(1e-9).of(result.sum!! / result.count!!) } - enum class ReturnedMetricsTestCase( - val requestedMetrics: ImmutableList, - val countExpected: Boolean, - val sumExpected: Boolean, - ) { - NO_SUM_NO_COUNT( - requestedMetrics = ImmutableList.of(MetricDefinition(MEAN)), - countExpected = false, - sumExpected = false, - ), - ONLY_SUM( - requestedMetrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), - countExpected = false, - sumExpected = true, - ), + enum class ReturnedMetricsTestCase(val requestedMetrics: ImmutableList) { + NO_SUM_NO_COUNT(requestedMetrics = ImmutableList.of(MetricDefinition(MEAN))), + ONLY_SUM(requestedMetrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM))), ONLY_COUNT( - requestedMetrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(COUNT)), - countExpected = true, - sumExpected = false, + requestedMetrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(COUNT)) ), COUNT_AND_SUM( requestedMetrics = - ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM), MetricDefinition(COUNT)), - countExpected = true, - sumExpected = true, + ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM), MetricDefinition(COUNT)) ), } @@ -385,13 +417,26 @@ class MeanCombinerTest { fun aggregate_computeMetrics_checkWhichMetricReturned( @TestParameter testCase: ReturnedMetricsTestCase ) { + val featureMetrics = testCase.requestedMetrics.filter { it.type == MEAN || it.type == SUM } + val nonFeatureMetrics = testCase.requestedMetrics.filter { it.type == COUNT } + val params = + AGG_PARAMS.copy( + nonFeatureMetrics = ImmutableList.copyOf(nonFeatureMetrics), + features = + ImmutableList.of( + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy( + metrics = ImmutableList.copyOf(featureMetrics) + ) + ), + ) val combiner = MeanCombiner( - AGG_PARAMS.copy(metrics = testCase.requestedMetrics), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val metrics = @@ -401,13 +446,13 @@ class MeanCombinerTest { normalizedSum = 120.0 } ) - if (testCase.countExpected) { + if (testCase.requestedMetrics.any { it.type == COUNT }) { assertThat(metrics.count).isNotNull() } else { assertThat(metrics.count).isNull() } - if (testCase.sumExpected) { + if (testCase.requestedMetrics.any { it.type == SUM }) { assertThat(metrics.sum).isNotNull() } else { assertThat(metrics.sum).isNull() @@ -416,13 +461,17 @@ class MeanCombinerTest { @Test fun computeMetrics_withoutNoise_withMultipleContributionsIncludingEmptyAccumulator_returnsCorrectResult() { + val featureSpec = + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy(minValue = -10.0, maxValue = 10.0) + val params = AGG_PARAMS.copy(features = ImmutableList.of(featureSpec)) val combiner = MeanCombiner( - AGG_PARAMS.copy(minValue = -10.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + featureSpec, ) val accumulator0 = combiner.emptyAccumulator() @@ -441,17 +490,26 @@ class MeanCombinerTest { @Test fun computeMetrics_withoutNoise_onlyEmptyAccumulator_returnsZeroCountAndNaNForSumAndMean() { + val params = + AGG_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + (AGG_PARAMS.features[0] as ScalarFeatureSpec).copy( + metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM)), + minValue = 4.0, + maxValue = 10.0, + ) + ), + ) val combiner = MeanCombiner( - AGG_PARAMS.copy( - ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(SUM), MetricDefinition(COUNT)), - minValue = 4.0, - maxValue = 10.0, - ), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val result = combiner.computeMetrics(combiner.emptyAccumulator()) diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionAndPerPartitionSamplerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionAndPerPartitionSamplerTest.kt index 0224a928..eec1018d 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionAndPerPartitionSamplerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PartitionAndPerPartitionSamplerTest.kt @@ -35,12 +35,14 @@ import org.junit.runners.JUnit4 class PartitionAndPerPartitionSamplerTest { val AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec("value", ImmutableList.of(MetricDefinition(MEAN)), -1.0, 1.0) + ), noiseKind = GAUSSIAN, maxPartitionsContributed = MAX_VALUE, maxContributionsPerPartition = MAX_VALUE, - minValue = -1.0, - maxValue = 1.0, ) @Test diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PerPartitionContributionsSamplerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PerPartitionContributionsSamplerTest.kt index cc45e02e..a222a5a1 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PerPartitionContributionsSamplerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PerPartitionContributionsSamplerTest.kt @@ -203,7 +203,7 @@ class PerPartitionContributionsSamplerTest { private companion object { val aggParams = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(MEAN)), noiseKind = GAUSSIAN, maxPartitionsContributed = 1, maxContributionsPerPartition = 2, diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PostAggregationPartitionSelectionCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PostAggregationPartitionSelectionCombinerTest.kt index 12f0b745..bf678834 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PostAggregationPartitionSelectionCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PostAggregationPartitionSelectionCombinerTest.kt @@ -200,7 +200,7 @@ class PostAggregationPartitionSelectionCombinerTest { companion object { private val AGGREGATION_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(MetricType.PRIVACY_ID_COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(MetricType.PRIVACY_ID_COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 3, maxContributionsPerPartition = 5, diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivacyIdCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivacyIdCombinerTest.kt index 2b70c0f7..2cda72f8 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivacyIdCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivacyIdCombinerTest.kt @@ -40,7 +40,7 @@ class PrivacyIdCombinerTest { companion object { private val AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 3, maxContributionsPerPartition = 5, diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsComputationalGraphTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsComputationalGraphTest.kt index fd27f4ed..cf200d5c 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsComputationalGraphTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsComputationalGraphTest.kt @@ -294,24 +294,27 @@ class PrivatePartitionsComputationalGraphTest { private companion object { val PRIVACY_ID_COUNT_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 10, maxContributionsPerPartition = 5, ) val COUNT_SUM_AND_ID_COUNT_PARAMS = AggregationParams( - metrics = + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(PRIVACY_ID_COUNT)), + features = ImmutableList.of( - MetricDefinition(COUNT), - MetricDefinition(SUM), - MetricDefinition(PRIVACY_ID_COUNT), + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -100.0, + maxTotalValue = 100.0, + ) ), noiseKind = GAUSSIAN, maxPartitionsContributed = 100, maxContributionsPerPartition = 100, - minTotalValue = -100.0, - maxTotalValue = 100.0, ) val METRICS_ALLOCATED_BUDGET = AllocatedBudget().apply { initialize(1.1, 1e-3) } // High epsilon/delta for partition selection. Partitions with ~10 privacy unit have ~1 @@ -334,6 +337,7 @@ class PrivatePartitionsComputationalGraphTest { METRICS_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_SUM_AND_ID_COUNT_PARAMS.features[0] as ScalarFeatureSpec, ), PrivacyIdCountCombiner( COUNT_SUM_AND_ID_COUNT_PARAMS, diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsTest.kt index e272bf7e..7d5cafb0 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PrivatePartitionsTest.kt @@ -39,7 +39,7 @@ class PrivatePartitionsTest { companion object { private val AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = 3, maxContributionsPerPartition = 5, diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PublicPartitionsComputationalGraphTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PublicPartitionsComputationalGraphTest.kt index 740a725f..87c4733b 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PublicPartitionsComputationalGraphTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/PublicPartitionsComputationalGraphTest.kt @@ -41,38 +41,48 @@ class PublicPartitionsComputationalGraphTest { companion object { private val COUNT_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = Int.MAX_VALUE, maxContributionsPerPartition = Int.MAX_VALUE, ) private val PRIVACY_ID_COUNT_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(PRIVACY_ID_COUNT)), noiseKind = GAUSSIAN, maxPartitionsContributed = Int.MAX_VALUE, ) private val SUM_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(SUM)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -Double.MAX_VALUE, + maxTotalValue = Double.MAX_VALUE, + ) + ), noiseKind = GAUSSIAN, maxPartitionsContributed = Int.MAX_VALUE, - minTotalValue = -Double.MAX_VALUE, - maxTotalValue = Double.MAX_VALUE, ) private val COUNT_SUM_AND_ID_COUNT_PARAMS = AggregationParams( - metrics = + nonFeatureMetrics = + ImmutableList.of(MetricDefinition(COUNT), MetricDefinition(PRIVACY_ID_COUNT)), + features = ImmutableList.of( - MetricDefinition(COUNT), - MetricDefinition(SUM), - MetricDefinition(PRIVACY_ID_COUNT), + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -100.0, + maxTotalValue = 100.0, + ) ), noiseKind = GAUSSIAN, maxPartitionsContributed = 100, maxContributionsPerPartition = 100, - minTotalValue = -100.0, - maxTotalValue = 100.0, ) private val ALLOCATED_BUDGET = AllocatedBudget() @@ -95,6 +105,7 @@ class PublicPartitionsComputationalGraphTest { ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + COUNT_SUM_AND_ID_COUNT_PARAMS.features[0] as ScalarFeatureSpec, ), PrivacyIdCountCombiner( COUNT_SUM_AND_ID_COUNT_PARAMS, @@ -196,6 +207,7 @@ class PublicPartitionsComputationalGraphTest { ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + SUM_PARAMS.features[0] as ScalarFeatureSpec, ) ) ), diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/QuantilesCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/QuantilesCombinerTest.kt index a810ef35..9aa0b638 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/QuantilesCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/QuantilesCombinerTest.kt @@ -32,12 +32,12 @@ import org.junit.runner.RunWith class QuantilesCombinerTest { private fun defaultQuantilesAggParams() = AggregationParams( - metrics = ImmutableList.of(), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of(ScalarFeatureSpec("value", ImmutableList.of(), -10000.0, 10000.0)), noiseKind = GAUSSIAN, maxPartitionsContributed = 1, maxContributionsPerPartition = 1, - minValue = -10000.0, - maxValue = 10000.0, ) @Test @@ -51,6 +51,7 @@ class QuantilesCombinerTest { allocatedBudget, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + defaultQuantilesAggParams().features[0] as ScalarFeatureSpec, ) val accumulator0 = combiner.emptyAccumulator() @@ -79,13 +80,19 @@ class QuantilesCombinerTest { fun computeMetrics_noNoise_onlyEmptyAccumulator_returnsQuantilesBetweenMinMaxValues() { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-5) + val params = + defaultQuantilesAggParams() + .copy( + features = ImmutableList.of(ScalarFeatureSpec("value", ImmutableList.of(), -10.0, 10.0)) + ) val combiner = QuantilesCombiner( sortedRanks = listOf(0.0, 0.5, 1.0), - defaultQuantilesAggParams().copy(minValue = -10.0, maxValue = 10.0), + params, allocatedBudget, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val quantiles = combiner.computeMetrics(combiner.emptyAccumulator()) @@ -101,13 +108,20 @@ class QuantilesCombinerTest { fun computeMetrics_smallNoise_returnsQuantilesCloseToReal(noiseKind: NoiseKind, delta: Double) { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(100.0, delta) + val params = + defaultQuantilesAggParams() + .copy( + features = ImmutableList.of(ScalarFeatureSpec("value", ImmutableList.of(), 1.0, 1000.0)), + noiseKind = noiseKind, + ) val combiner = QuantilesCombiner( sortedRanks = listOf(0.0, 0.5, 1.0), - defaultQuantilesAggParams().copy(minValue = 1.0, maxValue = 1000.0, noiseKind = noiseKind), + params, allocatedBudget, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/SumCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/SumCombinerTest.kt index fe8420d0..71957737 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/SumCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/SumCombinerTest.kt @@ -39,10 +39,17 @@ import org.mockito.kotlin.verify class SumCombinerTest { private val SUM_AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(SUM)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -1.0, + maxTotalValue = 3.0, + ) + ), noiseKind = GAUSSIAN, - minTotalValue = -1.0, - maxTotalValue = 3.0, maxPartitionsContributed = 5, ) @@ -56,12 +63,25 @@ class SumCombinerTest { @Test fun emptyAccumulator_minIsGreaterThanZero_returnsZeroAndIgnoresContributionBounds() { + val params = + SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = 1.0, + maxTotalValue = 2.0, + ) + ) + ) val combiner = SumCombiner( - SUM_AGG_PARAMS.copy(minTotalValue = 1.0, maxTotalValue = 2.0), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.emptyAccumulator() @@ -71,12 +91,25 @@ class SumCombinerTest { @Test fun createAccumulator_sumsItems() { + val params = + SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -300.0, + maxTotalValue = 300.0, + ) + ) + ) val combiner = SumCombiner( - SUM_AGG_PARAMS.copy(minTotalValue = -300.0, maxTotalValue = 300.0), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.TEST_MODE_WITH_CONTRIBUTION_BOUNDING, + params.features[0] as ScalarFeatureSpec, ) val accumulator = @@ -89,17 +122,27 @@ class SumCombinerTest { @Test fun createAccumulator_privacyLevelWithContributionBounding_clampsOnlyTotalSum() { + val params = + SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minValue = -1.0, + maxValue = 4.0, + minTotalValue = -2.0, + maxTotalValue = 300.0, + ) + ) + ) val combiner = SumCombiner( - SUM_AGG_PARAMS.copy( - minValue = -1.0, - maxValue = 4.0, - minTotalValue = -2.0, - maxTotalValue = 300.0, - ), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.TEST_MODE_WITH_CONTRIBUTION_BOUNDING, + params.features[0] as ScalarFeatureSpec, ) val accumulator = @@ -112,17 +155,27 @@ class SumCombinerTest { @Test fun createAccumulator_fullTestMode_doesNotClampTotalSum() { + val params = + SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minValue = -1.0, + maxValue = 4.0, + minTotalValue = -2.0, + maxTotalValue = 300.0, + ) + ) + ) val combiner = SumCombiner( - SUM_AGG_PARAMS.copy( - minValue = -1.0, - maxValue = 4.0, - minTotalValue = -2.0, - maxTotalValue = 300.0, - ), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), FULL_TEST_MODE, + params.features[0] as ScalarFeatureSpec, ) val accumulator = @@ -141,6 +194,7 @@ class SumCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.TEST_MODE_WITH_CONTRIBUTION_BOUNDING, + SUM_AGG_PARAMS.features[0] as ScalarFeatureSpec, ) val accumulator = @@ -160,6 +214,7 @@ class SumCombinerTest { allocatedBudget, NoiseFactory(), ExecutionMode.PRODUCTION, + SUM_AGG_PARAMS.features[0] as ScalarFeatureSpec, ) val result = combiner.computeMetrics(sumAccumulator { sum = 1.0 }) @@ -171,17 +226,27 @@ class SumCombinerTest { fun computeMetrics_passesCorrectParametersToNoise() { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-3) + val params = + SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -4.0, + maxTotalValue = 3.0, + ) + ), + noiseKind = GAUSSIAN, + maxPartitionsContributed = 10, + ) val combiner = SumCombiner( - SUM_AGG_PARAMS.copy( - noiseKind = GAUSSIAN, - maxPartitionsContributed = 10, - minTotalValue = -4.0, - maxTotalValue = 3.0, - ), + params, allocatedBudget, noiseFactoryMock, ExecutionMode.TEST_MODE_WITH_CONTRIBUTION_BOUNDING, + params.features[0] as ScalarFeatureSpec, ) val unused = combiner.computeMetrics(sumAccumulator { sum = 1.0 }) @@ -200,12 +265,25 @@ class SumCombinerTest { fun computeMetrics_withoutNoise_withMultipleContributionsIncludingEmptyAccumulator_returnsCorrectResult() { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-5) + val params = + SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -1.0, + maxTotalValue = 3.0, + ) + ) + ) val combiner = SumCombiner( - SUM_AGG_PARAMS.copy(minTotalValue = -1.0, maxTotalValue = 3.0), + params, allocatedBudget, ZeroNoiseFactory(), ExecutionMode.TEST_MODE_WITH_CONTRIBUTION_BOUNDING, + params.features[0] as ScalarFeatureSpec, ) val accumulator0 = combiner.emptyAccumulator() @@ -228,12 +306,25 @@ class SumCombinerTest { fun computeMetrics_withoutNoiseAndEmptyAccumulatorThenMerged_returnsZeroSum() { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-5) + val params = + SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(SUM)), + minTotalValue = -1.0, + maxTotalValue = 3.0, + ) + ) + ) val combiner = SumCombiner( - SUM_AGG_PARAMS.copy(minTotalValue = -1.0, maxTotalValue = 3.0), + params, allocatedBudget, ZeroNoiseFactory(), ExecutionMode.TEST_MODE_WITH_CONTRIBUTION_BOUNDING, + params.features[0] as ScalarFeatureSpec, ) val result = combiner.computeMetrics(combiner.emptyAccumulator()) diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VarianceCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VarianceCombinerTest.kt index d5970145..8224fd3c 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VarianceCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VarianceCombinerTest.kt @@ -50,6 +50,7 @@ class VarianceCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + AGG_PARAMS.features[0] as ScalarFeatureSpec, ) val accumulator = combiner.emptyAccumulator() @@ -66,14 +67,27 @@ class VarianceCombinerTest { @Test fun createAccumulator_doesNotClampContributionsWithinBounds() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = -8.0, + maxValue = 12.0, + ) + ) + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy(minValue = -8.0, maxValue = 12.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = @@ -91,14 +105,27 @@ class VarianceCombinerTest { @Test fun createAccumulator_privacyLevelWithContributionBounding_clampssingleValueContributions() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = -10.0, + maxValue = 10.0, + ) + ) + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy(minValue = -10.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = @@ -119,14 +146,27 @@ class VarianceCombinerTest { @Test fun createAccumulator_fullTestMode_doesNotClampSingleValueContributions() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = -10.0, + maxValue = 10.0, + ) + ) + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy(minValue = -10.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), FULL_TEST_MODE, + params.features[0] as ScalarFeatureSpec, ) val accumulator = @@ -146,14 +186,27 @@ class VarianceCombinerTest { @Test fun createAccumulator_normalizesSumAndSumOfSquares() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = 5.0, + maxValue = 10.0, + ) + ) + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy(minValue = 5.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = @@ -171,14 +224,27 @@ class VarianceCombinerTest { @Test fun createAccumulator_normalizationAndClamping() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = 5.0, + maxValue = 10.0, + ) + ) + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy(minValue = 5.0, maxValue = 10.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = @@ -198,14 +264,27 @@ class VarianceCombinerTest { @Test fun createAccumulator_aggregatesMultipleElements() { + val params = + AGG_PARAMS.copy( + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = 4.0, + maxValue = 10.0, + ) + ) + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy(minValue = 4.0), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) // Create list with one value that is clamped to min value. @@ -234,6 +313,7 @@ class VarianceCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + AGG_PARAMS.features[0] as ScalarFeatureSpec, ) val accumulator = @@ -271,20 +351,30 @@ class VarianceCombinerTest { val noise: Noise = mock() val noiseFactory: (NoiseKind) -> Noise = { _ -> noise } + val params = + AGG_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(VARIANCE)), + minValue = 4.0, + maxValue = 10.0, + ) + ), + maxPartitionsContributed = 5, + maxContributionsPerPartition = 7, + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy( - metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(VARIANCE)), - maxPartitionsContributed = 5, - maxContributionsPerPartition = 7, - minValue = 4.0, - maxValue = 10.0, - ), + params, countBudget, sumBudget, sumSquaresBudget, noiseFactory, ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = varianceAccumulator { @@ -334,27 +424,36 @@ class VarianceCombinerTest { val sumSquaresBudget = AllocatedBudget() sumSquaresBudget.initialize(10000.0, 0.0) + val params = + AGG_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of( + MetricDefinition(VARIANCE), + MetricDefinition(MEAN), + MetricDefinition(SUM), + ), + minValue = 4.0, + maxValue = 12.0, + ) + ), + maxPartitionsContributed = 5, + maxContributionsPerPartition = 7, + noiseKind = NoiseKind.LAPLACE, + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy( - metrics = - ImmutableList.of( - MetricDefinition(VARIANCE), - MetricDefinition(MEAN), - MetricDefinition(SUM), - MetricDefinition(COUNT), - ), - maxPartitionsContributed = 5, - maxContributionsPerPartition = 7, - minValue = 4.0, - maxValue = 12.0, - noiseKind = NoiseKind.LAPLACE, - ), + params, countBudget, sumBudget, sumSquaresBudget, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator = varianceAccumulator { @@ -375,43 +474,44 @@ class VarianceCombinerTest { } enum class ReturnedMetricsTestCase( - val requestedMetrics: ImmutableList, + val nonFeatureMetrics: ImmutableList, + val featureMetrics: ImmutableList, val countExpected: Boolean, val sumExpected: Boolean, val meanExpected: Boolean, ) { NO_SUM_NO_COUNT_NO_MEAN( - requestedMetrics = ImmutableList.of(MetricDefinition(VARIANCE)), + nonFeatureMetrics = ImmutableList.of(), + featureMetrics = ImmutableList.of(MetricDefinition(VARIANCE)), countExpected = false, sumExpected = false, meanExpected = false, ), ONLY_SUM( - requestedMetrics = ImmutableList.of(MetricDefinition(VARIANCE), MetricDefinition(SUM)), + nonFeatureMetrics = ImmutableList.of(), + featureMetrics = ImmutableList.of(MetricDefinition(VARIANCE), MetricDefinition(SUM)), countExpected = false, sumExpected = true, meanExpected = false, ), ONLY_COUNT( - requestedMetrics = ImmutableList.of(MetricDefinition(VARIANCE), MetricDefinition(COUNT)), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + featureMetrics = ImmutableList.of(MetricDefinition(VARIANCE)), countExpected = true, sumExpected = false, meanExpected = false, ), ONLY_MEAN( - requestedMetrics = ImmutableList.of(MetricDefinition(VARIANCE), MetricDefinition(MEAN)), + nonFeatureMetrics = ImmutableList.of(), + featureMetrics = ImmutableList.of(MetricDefinition(VARIANCE), MetricDefinition(MEAN)), countExpected = false, sumExpected = false, meanExpected = true, ), COUNT_AND_SUM_AND_MEAN( - requestedMetrics = - ImmutableList.of( - MetricDefinition(VARIANCE), - MetricDefinition(MEAN), - MetricDefinition(SUM), - MetricDefinition(COUNT), - ), + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + featureMetrics = + ImmutableList.of(MetricDefinition(VARIANCE), MetricDefinition(MEAN), MetricDefinition(SUM)), countExpected = true, sumExpected = true, meanExpected = true, @@ -422,17 +522,29 @@ class VarianceCombinerTest { fun aggregate_computeMetrics_checkWhichMetricReturned( @TestParameter testCase: ReturnedMetricsTestCase ) { + val features: ImmutableList = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = testCase.featureMetrics, + minValue = -10.0, + maxValue = 10.0, + ) + ) + val params = + AGG_PARAMS.copy(nonFeatureMetrics = testCase.nonFeatureMetrics, features = features) val combiner = VarianceCombiner( - AGG_PARAMS.copy(metrics = testCase.requestedMetrics), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) - val metrics = + val result = combiner.computeMetrics( varianceAccumulator { count = 10 @@ -441,38 +553,48 @@ class VarianceCombinerTest { } ) if (testCase.countExpected) { - assertThat(metrics.count).isNotNull() + assertThat(result.count).isNotNull() } else { - assertThat(metrics.count).isNull() + assertThat(result.count).isNull() } if (testCase.sumExpected) { - assertThat(metrics.sum).isNotNull() + assertThat(result.sum).isNotNull() } else { - assertThat(metrics.sum).isNull() + assertThat(result.sum).isNull() } if (testCase.meanExpected) { - assertThat(metrics.mean).isNotNull() + assertThat(result.mean).isNotNull() } else { - assertThat(metrics.mean).isNull() + assertThat(result.mean).isNull() } } @Test fun computeMetrics_withoutNoise_withMultipleContributionsIncludingEmptyAccumulator_returnsCorrectResult() { + val params = + AGG_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VARIANCE)), + minValue = -10.0, + maxValue = 10.0, + ) + ), + ) val combiner = VarianceCombiner( - AGG_PARAMS.copy( - ImmutableList.of(MetricDefinition(VARIANCE)), - minValue = -10.0, - maxValue = 10.0, - ), + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val accumulator0 = combiner.emptyAccumulator() @@ -495,23 +617,33 @@ class VarianceCombinerTest { @Test fun computeMetrics_withoutNoise_onlyEmptyAccumulator_returnsZeroCountAndNaNForCountMeanAndVariance() { - val combiner = - VarianceCombiner( - AGG_PARAMS.copy( + val params = + AGG_PARAMS.copy( + nonFeatureMetrics = ImmutableList.of(MetricDefinition(COUNT)), + features = ImmutableList.of( - MetricDefinition(VARIANCE), - MetricDefinition(MEAN), - MetricDefinition(SUM), - MetricDefinition(COUNT), + ScalarFeatureSpec( + featureId = "value", + metrics = + ImmutableList.of( + MetricDefinition(VARIANCE), + MetricDefinition(MEAN), + MetricDefinition(SUM), + ), + minValue = 4.0, + maxValue = 10.0, + ) ), - minValue = 4.0, - maxValue = 10.0, - ), + ) + val combiner = + VarianceCombiner( + params, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, UNUSED_ALLOCATED_BUDGET, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as ScalarFeatureSpec, ) val result = combiner.computeMetrics(combiner.emptyAccumulator()) @@ -530,12 +662,19 @@ class VarianceCombinerTest { companion object { private val AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(VARIANCE)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + ScalarFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(MEAN), MetricDefinition(VARIANCE)), + minValue = -10.0, + maxValue = 10.0, + ) + ), noiseKind = NoiseKind.GAUSSIAN, maxPartitionsContributed = 3, maxContributionsPerPartition = 5, - minValue = -10.0, - maxValue = 10.0, ) private val UNUSED_ALLOCATED_BUDGET = AllocatedBudget() diff --git a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VectorSumCombinerTest.kt b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VectorSumCombinerTest.kt index 2bc9ee42..4df3bfe0 100644 --- a/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VectorSumCombinerTest.kt +++ b/pipelinedp4j/tests/com/google/privacy/differentialprivacy/pipelinedp4j/core/VectorSumCombinerTest.kt @@ -44,11 +44,18 @@ import org.mockito.kotlin.verifyNoMoreInteractions class VectorSumCombinerTest { private val VECTOR_SUM_AGG_PARAMS = AggregationParams( - metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + nonFeatureMetrics = ImmutableList.of(), + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = NormKind.L_INF, + vectorMaxTotalNorm = 3.0, + ) + ), noiseKind = GAUSSIAN, - vectorNormKind = NormKind.L_INF, - vectorMaxTotalNorm = 3.0, - vectorSize = 3, maxPartitionsContributed = 5, ) @@ -60,12 +67,26 @@ class VectorSumCombinerTest { @Test fun emptyAccumulator_returnsZeroVector() { + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = NormKind.L_INF, + vectorMaxTotalNorm = 3.0, + ) + ) + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy(vectorSize = 3), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val accumulator = combiner.emptyAccumulator() @@ -76,16 +97,26 @@ class VectorSumCombinerTest { @Test fun createAccumulator_sumsVectors() { + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = NormKind.L1, + vectorMaxTotalNorm = 300.0, + ) + ) + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = NormKind.L1, - vectorMaxTotalNorm = 300.0, - vectorSize = 3, - ), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val accumulator = @@ -107,17 +138,27 @@ class VectorSumCombinerTest { @Test fun createAccumulator_perPartitionContributionBoundingEnabledLInfNorm_clampsOnlyTotalVectorSum() { + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = NormKind.L_INF, + vectorMaxTotalNorm = 30.0, + ) + ), + contributionBoundingLevel = PARTITION_LEVEL, + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = NormKind.L_INF, - vectorMaxTotalNorm = 30.0, - vectorSize = 3, - contributionBoundingLevel = PARTITION_LEVEL, - ), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val accumulator = @@ -139,16 +180,26 @@ class VectorSumCombinerTest { @Test fun createAccumulator_perPartitionContributionBoundingEnabledL1Norm_clampsOnlyTotalVectorSum() { + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 2, + normKind = NormKind.L1, + vectorMaxTotalNorm = 10.0, + ) + ) + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = NormKind.L1, - vectorMaxTotalNorm = 10.0, - vectorSize = 2, - ), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val accumulator = @@ -169,16 +220,26 @@ class VectorSumCombinerTest { @Test fun createAccumulator_perPartitionContributionBoundingEnabledL2Norm_clampsOnlyTotalVectorSum() { + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 2, + normKind = NormKind.L2, + vectorMaxTotalNorm = 6.5, + ) + ) + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = NormKind.L2, - vectorMaxTotalNorm = 6.5, - vectorSize = 2, - ), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val accumulator = @@ -198,16 +259,26 @@ class VectorSumCombinerTest { @Test fun createAccumulator_fullTestMode_doesNotClampTotalSum() { + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = NormKind.L_INF, + vectorMaxTotalNorm = 30.0, + ) + ) + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = NormKind.L_INF, - vectorMaxTotalNorm = 30.0, - vectorSize = 3, - ), + params, UNUSED_ALLOCATED_BUDGET, NoiseFactory(), FULL_TEST_MODE, + params.features[0] as VectorFeatureSpec, ) val accumulator = @@ -233,6 +304,7 @@ class VectorSumCombinerTest { UNUSED_ALLOCATED_BUDGET, NoiseFactory(), ExecutionMode.PRODUCTION, + VECTOR_SUM_AGG_PARAMS.features[0] as VectorFeatureSpec, ) val accumulator = @@ -250,12 +322,14 @@ class VectorSumCombinerTest { fun computeMetrics_addsNoise(noiseKind: NoiseKind, delta: Double) { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, delta) + val params = VECTOR_SUM_AGG_PARAMS.copy(noiseKind = noiseKind) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy(noiseKind = noiseKind), + params, allocatedBudget, NoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val result = @@ -280,17 +354,27 @@ class VectorSumCombinerTest { val noiseFactoryMock: (NoiseKind) -> Noise = { _ -> noiseMock } val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-3) + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = normKind, + vectorMaxTotalNorm = 30.0, + ) + ), + maxPartitionsContributed = 10, + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = normKind, - vectorMaxTotalNorm = 30.0, - vectorSize = 3, - maxPartitionsContributed = 10, - ), + params, allocatedBudget, noiseFactoryMock, ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val unused = @@ -336,17 +420,27 @@ class VectorSumCombinerTest { val noiseFactoryMock: (NoiseKind) -> Noise = { _ -> noiseMock } val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-3) + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 4, + normKind = normKind, + vectorMaxTotalNorm = 30.0, + ) + ), + maxPartitionsContributed = 100, + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = normKind, - vectorMaxTotalNorm = 30.0, - vectorSize = 4, - maxPartitionsContributed = 100, - ), + params, allocatedBudget, noiseFactoryMock, ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val unused = @@ -389,17 +483,27 @@ class VectorSumCombinerTest { fun computeMetrics_withoutNoise_withMultipleContributionsIncludingEmptyAccumulator_returnsCorrectResult() { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-5) + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = NormKind.L2, + vectorMaxTotalNorm = 30.0, + ) + ), + maxPartitionsContributed = 10, + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy( - vectorNormKind = NormKind.L2, - vectorMaxTotalNorm = 30.0, - vectorSize = 3, - maxPartitionsContributed = 10, - ), + params, allocatedBudget, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val accumulator0 = combiner.emptyAccumulator() @@ -434,12 +538,26 @@ class VectorSumCombinerTest { fun computeMetrics_withoutNoiseAndEmptyAccumulator_returnsZeroVectorSum() { val allocatedBudget = AllocatedBudget() allocatedBudget.initialize(1.1, 1e-5) + val params = + VECTOR_SUM_AGG_PARAMS.copy( + features = + ImmutableList.of( + VectorFeatureSpec( + featureId = "value", + metrics = ImmutableList.of(MetricDefinition(VECTOR_SUM)), + vectorSize = 3, + normKind = NormKind.L_INF, + vectorMaxTotalNorm = 3.0, + ) + ) + ) val combiner = VectorSumCombiner( - VECTOR_SUM_AGG_PARAMS.copy(vectorSize = 3), + params, allocatedBudget, ZeroNoiseFactory(), ExecutionMode.PRODUCTION, + params.features[0] as VectorFeatureSpec, ) val result = combiner.computeMetrics(combiner.emptyAccumulator())