Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ private ExponentialHistogram asCompressedHistogram(ExponentialHistogram histogra
CompressedExponentialHistogram.writeHistogramBytes(histoBytes, histogram.scale(), negativeBuckets, positiveBuckets);
CompressedExponentialHistogram result = new CompressedExponentialHistogram();
BytesRef data = histoBytes.bytes().toBytesRef();
result.reset(histogram.zeroBucket().zeroThreshold(), totalCount, histogram.sum(), data);
result.reset(histogram.zeroBucket().zeroThreshold(), totalCount, histogram.sum(), histogram.min(), data);
return result;
} catch (IOException e) {
throw new RuntimeException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ public double sum() {
return 0;
}

@Override
public double min() {
return Double.NaN;
}

@Override
public long ramBytesUsed() {
return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,13 @@ public interface ExponentialHistogram extends Accountable {
*/
double sum();

/**
* Returns minimum of all values represented by this histogram.
*
* @return the minimum, NaN for empty histograms
*/
double min();

/**
* Represents a bucket range of an {@link ExponentialHistogram}, either the positive or the negative range.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,9 @@ private void mergeValuesToHistogram() {
}

valueBuffer.reset();
valueBuffer.setSum(rawValuesSum());
Aggregates aggregates = rawValuesAggregates();
valueBuffer.setSum(aggregates.sum());
valueBuffer.setMin(aggregates.min());
int scale = valueBuffer.scale();

// Buckets must be provided with their indices in ascending order.
Expand Down Expand Up @@ -162,12 +164,17 @@ private void mergeValuesToHistogram() {
valueCount = 0;
}

private double rawValuesSum() {
private Aggregates rawValuesAggregates() {
if (valueCount == 0) {
return new Aggregates(0, Double.NaN);
}
double sum = 0;
double min = Double.MAX_VALUE;
for (int i = 0; i < valueCount; i++) {
sum += rawValueBuffer[i];
min = Math.min(min, rawValueBuffer[i]);
}
return sum;
return new Aggregates(sum, min);
}

private static long estimateBaseSize(int numBuckets) {
Expand All @@ -190,4 +197,6 @@ public void close() {
circuitBreaker.adjustBreaker(-estimateBaseSize(rawValueBuffer.length));
}
}

private record Aggregates(double sum, double min) {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ public void add(ExponentialHistogram toAdd) {
}
buffer.setZeroBucket(zeroBucket);
buffer.setSum(a.sum() + b.sum());

buffer.setMin(nanAwareMin(a.min(), b.min()));
// We attempt to bring everything to the scale of A.
// This might involve increasing the scale for B, which would increase its indices.
// We need to ensure that we do not exceed MAX_INDEX / MIN_INDEX in this case.
Expand Down Expand Up @@ -231,4 +231,14 @@ private static int putBuckets(
return overflowCount;
}

private static double nanAwareMin(double a, double b) {
if (Double.isNaN(a)) {
return b;
}
if (Double.isNaN(b)) {
return a;
}
return Math.min(a, b);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@

package org.elasticsearch.exponentialhistogram;

import java.util.OptionalDouble;
import java.util.OptionalLong;

public class ExponentialHistogramUtils {

/**
Expand Down Expand Up @@ -59,4 +62,44 @@ public static double estimateSum(BucketIterator negativeBuckets, BucketIterator
}
return sum;
}

/**
* Estimates the minimum value of the histogram based on the populated buckets.
* The returned value is guaranteed to be less than or equal to the exact minimum value of the histogram values.
* If the histogram is empty, an empty Optional is returned.
*
* Note that this method can return +-Infinity if the histogram bucket boundaries are not representable in a double.
*
* @param zeroBucket the zero bucket of the histogram
* @param negativeBuckets the negative buckets of the histogram
* @param positiveBuckets the positive buckets of the histogram
* @return the estimated minimum
*/
public static OptionalDouble estimateMin(
ZeroBucket zeroBucket,
ExponentialHistogram.Buckets negativeBuckets,
ExponentialHistogram.Buckets positiveBuckets
) {
int scale = negativeBuckets.iterator().scale();
assert scale == positiveBuckets.iterator().scale();

OptionalLong negativeMaxIndex = negativeBuckets.maxBucketIndex();
if (negativeMaxIndex.isPresent()) {
return OptionalDouble.of(-ExponentialScaleUtils.getUpperBucketBoundary(negativeMaxIndex.getAsLong(), scale));
}

if (zeroBucket.count() > 0) {
if (zeroBucket.zeroThreshold() == 0.0) {
// avoid negative zero
return OptionalDouble.of(0.0);
}
return OptionalDouble.of(-zeroBucket.zeroThreshold());
}

BucketIterator positiveBucketsIt = positiveBuckets.iterator();
if (positiveBucketsIt.hasNext()) {
return OptionalDouble.of(ExponentialScaleUtils.getLowerBucketBoundary(positiveBucketsIt.peekIndex(), scale));
}
return OptionalDouble.empty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public class ExponentialHistogramXContent {

public static final String SCALE_FIELD = "scale";
public static final String SUM_FIELD = "sum";
public static final String MIN_FIELD = "min";
public static final String ZERO_FIELD = "zero";
public static final String ZERO_COUNT_FIELD = "count";
public static final String ZERO_THRESHOLD_FIELD = "threshold";
Expand All @@ -51,6 +52,9 @@ public static void serialize(XContentBuilder builder, ExponentialHistogram histo

builder.field(SCALE_FIELD, histogram.scale());
builder.field(SUM_FIELD, histogram.sum());
if (Double.isNaN(histogram.min()) == false) {
builder.field(MIN_FIELD, histogram.min());
}
double zeroThreshold = histogram.zeroBucket().zeroThreshold();
long zeroCount = histogram.zeroBucket().count();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ final class FixedCapacityExponentialHistogram implements ReleasableExponentialHi
private final Buckets positiveBuckets = new Buckets(true);

private double sum;
private double min;

private final ExponentialHistogramCircuitBreaker circuitBreaker;
private boolean closed = false;
Expand Down Expand Up @@ -81,6 +82,7 @@ private FixedCapacityExponentialHistogram(int bucketCapacity, ExponentialHistogr
*/
void reset() {
sum = 0;
min = Double.NaN;
setZeroBucket(ZeroBucket.minimalEmpty());
resetBuckets(MAX_SCALE);
}
Expand Down Expand Up @@ -122,6 +124,15 @@ void setSum(double sum) {
this.sum = sum;
}

@Override
public double min() {
return min;
}

void setMin(double min) {
this.min = min;
}

/**
* Attempts to add a bucket to the positive or negative range of this histogram.
* <br>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.DoubleStream;
import java.util.stream.IntStream;

import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX;
Expand Down Expand Up @@ -106,19 +107,24 @@ public void testEmptyZeroBucketIgnored() {
assertThat(posBuckets.hasNext(), equalTo(false));
}

public void testSumCorrectness() {
public void testAggregatesCorrectness() {
double[] firstValues = randomDoubles(100).map(val -> val * 2 - 1).toArray();
double[] secondValues = randomDoubles(50).map(val -> val * 2 - 1).toArray();
double correctSum = Arrays.stream(firstValues).sum() + Arrays.stream(secondValues).sum();
double correctMin = DoubleStream.concat(Arrays.stream(firstValues), Arrays.stream(secondValues)).min().getAsDouble();
try (
// Merge some empty histograms too to test that code path
ReleasableExponentialHistogram merged = ExponentialHistogram.merge(
2,
breaker(),
ExponentialHistogram.empty(),
createAutoReleasedHistogram(10, firstValues),
createAutoReleasedHistogram(20, secondValues)
createAutoReleasedHistogram(20, secondValues),
ExponentialHistogram.empty()
)
) {
assertThat(merged.sum(), closeTo(correctSum, 0.000001));
assertThat(merged.min(), equalTo(correctMin));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

package org.elasticsearch.exponentialhistogram;

import java.util.OptionalDouble;

import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.equalTo;

Expand Down Expand Up @@ -57,7 +59,7 @@ public void testRandomDataSumEstimation() {
}
}

public void testInfinityHandling() {
public void testSumInfinityHandling() {
FixedCapacityExponentialHistogram morePositiveValues = createAutoReleasedHistogram(100);
morePositiveValues.resetBuckets(0);
morePositiveValues.tryAddBucket(1999, 1, false);
Expand All @@ -83,4 +85,85 @@ public void testInfinityHandling() {
);
assertThat(sum, equalTo(Double.NEGATIVE_INFINITY));
}

public void testMinimumEstimation() {
for (int i = 0; i < 100; i++) {
int positiveValueCount = randomBoolean() ? 0 : randomIntBetween(10, 10_000);
int negativeValueCount = randomBoolean() ? 0 : randomIntBetween(10, 10_000);
int zeroValueCount = randomBoolean() ? 0 : randomIntBetween(10, 100);
int bucketCount = randomIntBetween(2, 500);

double correctMin = Double.MAX_VALUE;
double zeroThreshold = Double.MAX_VALUE;
double[] values = new double[positiveValueCount + negativeValueCount];
for (int j = 0; j < values.length; j++) {
double absValue = Math.pow(10, randomIntBetween(1, 9)) * randomDouble();
if (j < positiveValueCount) {
values[j] = absValue;
} else {
values[j] = -absValue;
}
zeroThreshold = Math.min(zeroThreshold, absValue / 2);
correctMin = Math.min(correctMin, values[j]);
}
if (zeroValueCount > 0) {
correctMin = Math.min(correctMin, -zeroThreshold);
}

ExponentialHistogram histo = createAutoReleasedHistogram(bucketCount, values);

OptionalDouble estimatedMin = ExponentialHistogramUtils.estimateMin(
new ZeroBucket(zeroThreshold, zeroValueCount),
histo.negativeBuckets(),
histo.positiveBuckets()
);
if (correctMin == Double.MAX_VALUE) {
assertThat(estimatedMin.isPresent(), equalTo(false));
} else {
assertThat(estimatedMin.isPresent(), equalTo(true));
// If the histogram does not contain mixed sign values, we have a guaranteed relative error bound of 2^(2^-scale) - 1
double histogramBase = Math.pow(2, Math.pow(2, -histo.scale()));
double allowedError = Math.abs(correctMin * (histogramBase - 1));
assertThat(estimatedMin.getAsDouble(), closeTo(correctMin, allowedError));
}
}
}

public void testMinimumEstimationPositiveInfinityHandling() {
FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(100);
histo.resetBuckets(0);
histo.tryAddBucket(2000, 1, true);

OptionalDouble estimate = ExponentialHistogramUtils.estimateMin(
ZeroBucket.minimalEmpty(),
histo.negativeBuckets(),
histo.positiveBuckets()
);
assertThat(estimate.isPresent(), equalTo(true));
assertThat(estimate.getAsDouble(), equalTo(Double.POSITIVE_INFINITY));
}

public void testMinimumEstimationNegativeInfinityHandling() {
FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(100);
histo.resetBuckets(0);
histo.tryAddBucket(2000, 1, false);

OptionalDouble estimate = ExponentialHistogramUtils.estimateMin(
ZeroBucket.minimalEmpty(),
histo.negativeBuckets(),
histo.positiveBuckets()
);
assertThat(estimate.isPresent(), equalTo(true));
assertThat(estimate.getAsDouble(), equalTo(Double.NEGATIVE_INFINITY));
}

public void testMinimumEstimationSanitizedNegativeZero() {
OptionalDouble estimate = ExponentialHistogramUtils.estimateMin(
ZeroBucket.minimalWithCount(42),
ExponentialHistogram.empty().negativeBuckets(),
ExponentialHistogram.empty().positiveBuckets()
);
assertThat(estimate.isPresent(), equalTo(true));
assertThat(estimate.getAsDouble(), equalTo(0.0));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public void testFullHistogram() {
histo.setZeroBucket(new ZeroBucket(0.1234, 42));
histo.resetBuckets(7);
histo.setSum(1234.56);
histo.setMin(-321.123);
histo.tryAddBucket(-10, 15, false);
histo.tryAddBucket(10, 5, false);
histo.tryAddBucket(-11, 10, true);
Expand All @@ -51,6 +52,7 @@ public void testFullHistogram() {
"{"
+ "\"scale\":7,"
+ "\"sum\":1234.56,"
+ "\"min\":-321.123,"
+ "\"zero\":{\"count\":42,\"threshold\":0.1234},"
+ "\"positive\":{\"indices\":[-11,11],\"counts\":[10,20]},"
+ "\"negative\":{\"indices\":[-10,10],\"counts\":[15,5]}"
Expand All @@ -72,25 +74,28 @@ public void testOnlyZeroCount() {
histo.setZeroBucket(new ZeroBucket(0.0, 7));
histo.resetBuckets(2);
histo.setSum(1.1);
assertThat(toJson(histo), equalTo("{\"scale\":2,\"sum\":1.1,\"zero\":{\"count\":7}}"));
histo.setMin(0);
assertThat(toJson(histo), equalTo("{\"scale\":2,\"sum\":1.1,\"min\":0.0,\"zero\":{\"count\":7}}"));
}

public void testOnlyPositiveBuckets() {
FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(10);
histo.resetBuckets(4);
histo.setSum(1.1);
histo.setMin(0.5);
histo.tryAddBucket(-1, 3, true);
histo.tryAddBucket(2, 5, true);
assertThat(toJson(histo), equalTo("{\"scale\":4,\"sum\":1.1,\"positive\":{\"indices\":[-1,2],\"counts\":[3,5]}}"));
assertThat(toJson(histo), equalTo("{\"scale\":4,\"sum\":1.1,\"min\":0.5,\"positive\":{\"indices\":[-1,2],\"counts\":[3,5]}}"));
}

public void testOnlyNegativeBuckets() {
FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(10);
histo.resetBuckets(5);
histo.setSum(1.1);
histo.setMin(-0.5);
histo.tryAddBucket(-1, 4, false);
histo.tryAddBucket(2, 6, false);
assertThat(toJson(histo), equalTo("{\"scale\":5,\"sum\":1.1,\"negative\":{\"indices\":[-1,2],\"counts\":[4,6]}}"));
assertThat(toJson(histo), equalTo("{\"scale\":5,\"sum\":1.1,\"min\":-0.5,\"negative\":{\"indices\":[-1,2],\"counts\":[4,6]}}"));
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have an example here where sum and min are calculated from the histogram buckets?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, this tests are only about serialization and don't care about semantic correctness.
For the correctness of aggregates we have tests in ExponentialHistogramMergerTests.java.


private static String toJson(ExponentialHistogram histo) {
Expand Down
Loading