Skip to content
3 changes: 2 additions & 1 deletion .github/workflows/test-type-lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ jobs:
run: |
source activate ./ci_env
pip install -e .[dev]
pip install scikit-learn lightning # for docs

- name: Print installed packages
run: |
Expand Down Expand Up @@ -76,7 +77,7 @@ jobs:
sed -i 's/\"auto\"/None/g' README.md
# on Mac: sed -i '' 's/cluster: slurm/cluster: null/g' infra/*.md
# check readmes
pytest --markdown-docs -m markdown-docs `**/*.md`
pytest --markdown-docs -m markdown-docs .

- name: Run basic pylint
run: |
Expand Down
74 changes: 74 additions & 0 deletions docs/infra/example_sklearn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
"""
A minimalist example with sklearn to show how to develop and explore a model with exca.
"""
import typing as tp
import numpy as np
import pydantic
import sys
import exca
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error


class Dataset(pydantic.BaseModel):
n_samples: int = 100
noise: float = 0.1
random_state: int = 42
test_size: float = 0.2
model_config = pydantic.ConfigDict(extra="forbid")

def get(self) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
# Generate synthetic data
X, y = make_regression(
n_samples=self.n_samples,
noise=self.noise,
random_state=self.random_state
)
# Split into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(
X, y,
test_size=self.test_size,
random_state=self.random_state
)
return X_train, X_test, y_train, y_test


class Model(pydantic.BaseModel):
data: Dataset = Dataset()
alpha: float = 1.0
max_iter: int = 1000
infra: exca.TaskInfra = exca.TaskInfra(folder='.cache/')

@infra.apply
def score(self):
# Get data
X_train, X_test, y_train, y_test = self.data.get()

# Train a Ridge regression model
print('Fit...')
model = Ridge(alpha=self.alpha, max_iter=self.max_iter)
model.fit(X_train, y_train)

# Evaluate
print('Score...')
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
return mse


if __name__ == "__main__":
# Validate config
config = exca.ConfDict.from_args(sys.argv[1:])
model = Model(**config)
print(model.infra.config)

# Score
mse = model.score()
print(mse)
Loading