A Go implementation of Histogram-Based Outlier Score (HBOS) for anomaly detection.
HBOS is an efficient unsupervised anomaly detection algorithm that assumes feature independence and calculates the degree of outlyingness by building histograms. This implementation is based on the PyOD Python library.
- Fast: O(n) training and inference time complexity
- Simple: Easy to understand histogram-based approach
- Explainable: Provides per-feature contribution scores to explain why a sample is anomalous
- Flexible: Works with both numerical and categorical data
go get github.com/MohammadMdv/go-hbospackage main
import (
"fmt"
hbos "github.com/MohammadMdv/go-hbos"
)
func main() {
// Create trainer with parameters
trainer, _ := hbos.NewHBOSTrainer(
10, // n_bins: number of histogram bins
0.1, // alpha: regularizer to prevent log(0)
0.5, // tol: tolerance for values outside bins
0.1, // contamination: expected proportion of outliers
)
// Training data (numerical features)
X_train := [][]float64{
{1.0, 2.0},
{1.1, 2.1},
{1.2, 2.2},
{10.0, 20.0}, // outlier
}
columns := []string{"feature_1", "feature_2"}
// Train the model
model, metrics, _ := trainer.Train(X_train, columns)
fmt.Printf("Trained in %v with threshold: %.4f\n", metrics.Duration, metrics.Threshold)
// Score new samples
X_test := [][]float64{
{1.0, 2.0}, // normal
{10.0, 20.0}, // outlier
}
scores, _ := model.Score(X_test)
for i, score := range scores {
fmt.Printf("Sample %d: score=%.4f, anomaly=%v\n", i, score, score > model.Threshold)
}
}package main
import (
"fmt"
hbos "github.com/MohammadMdv/go-hbos"
)
func main() {
// Create trainer for categorical data
trainer, _ := hbos.NewHBOSFullTrainer(10, 0.1, 0.5, 0.1)
// Training data (categorical features)
trainData := hbos.Dataset{
Columns: []string{"browser", "os", "action"},
Rows: [][]string{
{"chrome", "windows", "login"},
{"chrome", "windows", "view"},
{"firefox", "linux", "login"},
{"safari", "macos", "view"},
{"unknown", "unknown", "delete"}, // potential anomaly
},
}
// Train the model
model, _, _ := trainer.Train(trainData)
// Score with explanation
testData := hbos.Dataset{
Columns: []string{"browser", "os", "action"},
Rows: [][]string{
{"chrome", "windows", "login"}, // normal
{"unknown", "unknown", "delete"}, // anomaly
},
}
results, _ := model.ScoreWithExplanation(testData)
for i, result := range results {
fmt.Printf("\nSample %d: score=%.4f, anomaly=%v\n", i, result.TotalScore, result.IsAnomaly)
// Show top contributing features
for _, contrib := range result.TopContributors(2) {
fmt.Printf(" - %s: contribution=%.4f\n", contrib.FeatureName, contrib.Score)
}
}
}Creates and trains HBOS models on numerical data.
trainer, err := hbos.NewHBOSTrainer(nBins, alpha, tol, contamination)
model, metrics, err := trainer.Train(X, columns)Parameters:
nBins(int): Number of histogram bins (default: 10)alpha(float64): Regularizer for preventing log(0), must be in (0, 1) (default: 0.1)tol(float64): Tolerance for samples falling outside bins, must be in (0, 1) (default: 0.5)contamination(float64): Expected proportion of outliers, must be in (0, 0.5] (default: 0.1)
The trained model for numerical data.
scores, err := model.Score(X) // Get anomaly scores
labels, err := model.Predict(X) // Get binary labels (0=normal, 1=anomaly)
results, err := model.ScoreWithExplanation(X) // Get scores with per-feature breakdownEnd-to-end training and inference for categorical data with automatic feature encoding.
trainer, err := hbos.NewHBOSFullTrainer(nBins, alpha, tol, contamination)
model, metrics, err := trainer.Train(dataset)
scores, err := model.Score(dataset)
results, err := model.ScoreWithExplanation(dataset)
report := model.GenerateReport(&results[0], topN)
fmt.Println(report.ExplainAnomaly())The ScoreWithExplanation method returns detailed information about each sample:
type ScoredSample struct {
TotalScore float64
IsAnomaly bool
Contributions []FeatureContribution
}
type FeatureContribution struct {
FeatureName string
FeatureIndex int
Value float64
Score float64 // Higher = more anomalous
BinIndex int // Which histogram bin (-1 if outside)
IsOutsideBin bool // True if value fell outside all bins
}Get top N contributors:
topContributors := result.TopContributors(3)Generate human-readable report:
report := model.GenerateReport(&result, 3)
explanation := report.ExplainAnomaly()
// Output: "Sample is ANOMALOUS (score: 15.5, threshold: 10.0)
// Top contributing features:
// 1. browser: value=5.0, contribution=8.0
// 2. location: value=-1.0, contribution=5.0 (outside histogram bins)"For custom preprocessing of categorical data:
// Ordinal encoding (A=0, B=1, C=2, ...)
encoder := hbos.NewFeatureExtractor()
numerical, err := encoder.FitTransform(dataset)
// Frequency encoding (rare values get lower numbers)
freqEncoder := hbos.NewFrequencyEncoder()
numerical, err := freqEncoder.FitTransform(dataset)
// Mixed encoding (categorical + numerical columns)
mixedEncoder := hbos.NewMixedEncoder([]string{"category_col"})
numerical, err := mixedEncoder.FitTransform(dataset)// Save model
file, _ := os.Create("model.gob")
model.Save(file)
file.Close()
// Load model
file, _ := os.Open("model.gob")
loadedModel := &hbos.HBOSModel{}
loadedModel.Load(file)
file.Close()HBOS works by:
- Building a histogram for each feature
- Computing the density of each histogram bin
- For each sample, finding which bin each feature value falls into
- Computing the outlier score as the negative sum of log-densities
- Higher scores indicate more anomalous samples (lower density regions)
The algorithm assumes feature independence, which makes it very fast but may miss anomalies that only appear in feature combinations.
- Goldstein, M. and Dengel, A., 2012. Histogram-based outlier score (hbos): A fast unsupervised anomaly detection algorithm. KI-2012: Poster and Demo Track, pp.59-63.
- PyOD: A Python Toolbox for Scalable Outlier Detection
MIT License