using System.Collections.Frozen;
using System.Diagnostics.Metrics;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using OpenTelemetry;
using OpenTelemetry.Metrics;
using Microsoft.Extensions.Diagnostics.Metrics;
BenchmarkRunner.Run<Bench>();
internal readonly record struct RequestTags(
[property: TagName("http.request.method")] string Method,
[property: TagName("http.response.status_code")] string Status,
[property: TagName("http.route")] string Route,
[property: TagName("server.port")] string Port);
internal static partial class Metrics
{
[Counter<long>(
"http.request.method",
"http.response.status_code",
"http.route",
"server.port",
Name = "http.server.requests")]
public static partial TotalCount1 CreateRequestsCounter1(Meter meter);
[Counter<long>(typeof(RequestTags), Name = "http.server.requests")]
public static partial TotalCount2 CreateRequestsCounter2(Meter meter);
}
internal static class DynamicInstrument
{
public static DynamicInstrument<TInstrument, T1, T2, T3, T4> Create<TInstrument, T1, T2, T3, T4>(
Func<T1, T2, T3, T4, TInstrument> factory)
where TInstrument : Instrument
{
return new DynamicInstrument<TInstrument, T1, T2, T3, T4>(factory);
}
}
internal sealed class DynamicInstrument<TInstrument, T1, T2, T3, T4>
where TInstrument : Instrument
{
private readonly Func<T1, T2, T3, T4, TInstrument> _factory;
private FrozenDictionary<InstrumentKey, TInstrument> _cache = FrozenDictionary<InstrumentKey, TInstrument>.Empty;
public DynamicInstrument(Func<T1, T2, T3, T4, TInstrument> factory)
{
ArgumentNullException.ThrowIfNull(factory);
_factory = factory;
}
public TInstrument this[T1 t1, T2 t2, T3 t3, T4 t4]
{
get
{
InstrumentKey key = new(t1, t2, t3, t4);
if (_cache.TryGetValue(key, out var instrument))
{
return instrument;
}
instrument = _factory(t1, t2, t3, t4);
return FrozenDictionaryGetOrAdd(ref _cache, key, instrument);
}
}
/// <summary>
/// Because the dictionaries are expected to only have a bunch of writes at startup and then only reads, a
/// <see cref="FrozenDictionary{TKey,TValue}"/> is used and the dictionary is recreated for each write.
/// </summary>
private static TValue FrozenDictionaryGetOrAdd<TKey, TValue>(
ref FrozenDictionary<TKey, TValue> dic,
TKey key,
TValue value)
where TKey : notnull
{
var oldDic = dic;
while (true)
{
// Check if another thread already created a new dictionary with the value.
if (oldDic.TryGetValue(key, out var otherValue))
{
return otherValue;
}
var newDic = oldDic.Append(KeyValuePair.Create(key, value)).ToFrozenDictionary();
var x = Interlocked.CompareExchange(ref dic, newDic, oldDic);
if (x == oldDic)
{
return value;
}
oldDic = x;
}
}
private readonly struct InstrumentKey(T1 item1, T2 item2, T3 item3, T4 item4)
: IEquatable<InstrumentKey>
{
public readonly T1 Item1 = item1;
public readonly T2 Item2 = item2;
public readonly T3 Item3 = item3;
public readonly T4 Item4 = item4;
public bool Equals(InstrumentKey other) =>
EqualityComparer<T1>.Default.Equals(Item1, other.Item1)
&& EqualityComparer<T2>.Default.Equals(Item2, other.Item2)
&& EqualityComparer<T3>.Default.Equals(Item3, other.Item3)
&& EqualityComparer<T4>.Default.Equals(Item4, other.Item4);
public override bool Equals(object? obj) => obj is InstrumentKey other && Equals(other);
public override int GetHashCode() => HashCode.Combine(Item1, Item2, Item3, Item4);
}
}
/// <summary>
/// A <see cref="Counter{T}"/> with bound tags. Unlike <see cref="Counter{T}"/>, <see cref="BoundCounter{T}"/> only
/// allow passing tags at creation time (see <see href="https://github.com/open-telemetry/opentelemetry-dotnet/issues/5478"/>),
/// which makes it significantly more performant.
/// </summary>
/// <remarks>
/// Inspired by proposal https://github.com/open-telemetry/opentelemetry-specification/issues/4126.
/// </remarks>
internal sealed class BoundCounter<T> : Instrument<T> where T : struct
{
private readonly ObservableCounter<T> _counter;
private T _value;
public BoundCounter(
Meter meter,
string name,
string? unit,
string? description,
IEnumerable<KeyValuePair<string, object?>>? tags)
: base(meter, name, unit, description, tags)
{
_counter = meter.CreateObservableCounter(name, () => new Measurement<T>(_value, Tags), unit, description);
}
/// <summary>
/// Record the increment value of the measurement.
/// </summary>
/// <param name="delta">The increment measurement.</param>
public void Add(T delta)
{
if (typeof(T) == typeof(int))
{
Interlocked.Add(ref Unsafe.As<T, int>(ref _value), Unsafe.As<T, int>(ref delta));
}
else if (typeof(T) == typeof(long))
{
Interlocked.Add(ref Unsafe.As<T, long>(ref _value), Unsafe.As<T, long>(ref delta));
}
// byte, short, float, double, decimal could also be supported.
else
{
throw new NotSupportedException();
}
}
}
[MemoryDiagnoser]
public class Bench
{
private const int Iterations = 1000;
private Counter<long> _requestsCounter = null!;
private DynamicInstrument<BoundCounter<long>, string, int, string, int> _requestsDynamicCounter = null!;
private TotalCount1 _sourceGeneratedRequestsCounter = null!;
private TotalCount2 _sourceGeneratedRequestsTypedCounter = null!;
private MeterProvider _meterProvider = null!;
private List<Metric> _metrics = [];
public string Method { get; set; } = "GET";
public int StatusCode { get; set; } = 200;
public string Route { get; set; } = "/users";
public int Port { get; set; } = 443;
[GlobalSetup]
public void GlobalSetup()
{
Meter meter = new("Benchmark");
_meterProvider = Sdk.CreateMeterProviderBuilder()
.AddMeter(meter.Name)
.AddInMemoryExporter(_metrics)
.Build();
_requestsCounter = meter.CreateCounter<long>("http.server.requests", "{request}",
"Number of HTTP server requests.");
_requestsDynamicCounter = DynamicInstrument.Create((string method, int status, string route, int port) => new BoundCounter<long>(
meter, "http.server.requests", null, null,
[
new KeyValuePair<string, object?>("http.request.method", method),
new KeyValuePair<string, object?>("http.response.status_code", status),
new KeyValuePair<string, object?>("http.route", route),
new KeyValuePair<string, object?>("server.port", port),
]));
_sourceGeneratedRequestsCounter = Metrics.CreateRequestsCounter1(meter);
_sourceGeneratedRequestsTypedCounter = Metrics.CreateRequestsCounter2(meter);
}
[Benchmark(Baseline = true, OperationsPerInvoke = Iterations)]
public void Counter()
{
for (int i = 0; i < Iterations; i += 1)
{
_requestsCounter.Add(1, [
new KeyValuePair<string, object?>("http.request.method", Method),
new KeyValuePair<string, object?>("http.response.status_code", StatusCode),
new KeyValuePair<string, object?>("http.route", Route),
new KeyValuePair<string, object?>("server.port", Port),
]);
}
}
[Benchmark(OperationsPerInvoke = Iterations)]
public void DynamicCounter()
{
for (int i = 0; i < Iterations; i += 1)
{
_requestsDynamicCounter[Method, StatusCode, Route, Port].Add(1);
}
}
[Benchmark(OperationsPerInvoke = Iterations)]
public void SourceGeneratedCounter()
{
for (int i = 0; i < Iterations; i += 1)
{
_sourceGeneratedRequestsCounter.Add(1, Method, StatusCode, Route, Port);
}
}
[Benchmark(OperationsPerInvoke = Iterations)]
public void SourceGeneratedTypedCounter()
{
for (int i = 0; i < Iterations; i += 1)
{
_sourceGeneratedRequestsTypedCounter.Add(1, new RequestTags(Method, StatusCode.ToString(), Route, Port.ToString()));
}
}
}
Problem
Instrument tag values are very often known very late, preventing to pre-create the tags collection. That tricks the developer into writing code like this:
This is super expensive to just increment a counter by one.
Can it really impact the performance of an application? Yes. We are just starting to look at OTEL for metrics, currently we use a loose graphite API where the developer just calls
Increment("my.counter")and obviously developers would allocate strings in the hot path like thisIncrement($"my.counter.{x}.something.{y}")which results in similar performance has the OTEL example above. Our largest service was spending almost 1% of its CPU to increment these counters. At our scale, this was an important waste of money.Existing Solutions
I recently found out about Compile-time metric source generation but after trying it out, it seems to generate code that takes
objectas argument, forcing a boxing operation.Alternatively, we can use Source-generated metrics with strongly-typed tags, but now the problem is that it only accepts string and enum. For other types, the developer might just slap a
.ToString()in the hot path.Additionally, neither of these API support observable instruments, so each record trigger the OTEL SDK listeners.
Benchmark
Here is a benchmark that compares a dumb couter (Counter) VS source generated counters (SourceGeneratedCounter & SourceGeneratedTypedCounter) VS a custom API that leverages ObservableCounter and FrozenDictionary (DynamicCounter).
The metric used in that benchmark is inspired by http.server.request.duration. It's an interesting one because it has 4 tags and 2 of them are ints.
It shows that source generators don't help the custom solution can be 50x times faster and not allocate any memory.
Interestingly enough, I thought most of the time was spent in the boxing operations, but if I replace the ints by strings I get this
So most of the time is probably spent in the OTEL SDK. Though, these micro-benchmarks don't capture the pressure put on the GC. In our largest services we can have hundred of counters incremented for a single request which quickly adds up.
Benchmark
Potential Solution
I would be great to leverage observable instruments in the source-generated classes and avoid the boxing.
Before proposing an API, I would like to hear your input @tarekgh @jkotas @noahfalk @cijothomas