Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ build/
.coverage
.pytest_cache/
.ruff_cache/
.env
config.yaml
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

## [0.1.2] - 2026-02-27

### Added
- `config-example.yaml` with example project configuration (`config.yaml` is gitignored)
- `src/docproc/config.py` — typed configuration loader with Pydantic models
- Environment variable substitution (`${VAR}`) with dotenv support
- Path resolution against project root
- Singleton caching for configuration
- Configuration validation (watch dir exists, recipients non-empty, API key set)

## [0.1.1] - 2026-02-27

### Added
Expand Down
15 changes: 15 additions & 0 deletions config-example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
directories:
watch: "./inbox"
output: "./output"

deepfellow:
base_url: "http://localhost:8000"
responses_endpoint: "/v1/responses"
api_key: "${DEEPFELLOW_API_KEY}"
vision_model: "gpt-4-vision"
llm_model: "deepseek"
rag_collection: "documents"

recipients:
- name: "Piotr Zalewa"
tags: ["aquarium", "fish", "reef"]
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "docproc"
version = "0.1.1"
version = "0.1.2"
requires-python = ">=3.14"
dependencies = [
"watchdog>=4.0.0",
Expand Down
2 changes: 1 addition & 1 deletion src/docproc/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.1"
__version__ = "0.1.2"
199 changes: 199 additions & 0 deletions src/docproc/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
"""Configuration loader for PaperRoute.

Reads config.yaml, substitutes environment variables, resolves paths,
validates constraints, and caches the result as a singleton.
"""

import os
import re
from pathlib import Path

import yaml
from dotenv import load_dotenv
from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator

_ENV_VAR_PATTERN = re.compile(r"\$\{([^}]+)\}")


class DirectoriesConfig(BaseModel):
model_config = ConfigDict(frozen=True)

watch: Path
output: Path


class DeepfellowConfig(BaseModel):
model_config = ConfigDict(frozen=True)

base_url: str = Field(min_length=1)
responses_endpoint: str = Field(min_length=1)
api_key: str
vision_model: str = Field(min_length=1)
llm_model: str = Field(min_length=1)
rag_collection: str = Field(min_length=1)

@field_validator("api_key")
@classmethod
def api_key_must_not_be_blank(cls, v: str) -> str:
if not v.strip():
msg = "API key must not be blank"
raise ValueError(msg)
return v


class Recipient(BaseModel):
model_config = ConfigDict(frozen=True)

name: str = Field(min_length=1)
tags: tuple[str, ...] = Field(min_length=1)

@field_validator("tags")
@classmethod
def tags_must_not_contain_blanks(cls, v: tuple[str, ...]) -> tuple[str, ...]:
for tag in v:
if not tag.strip():
msg = "Tags must not contain blank strings"
raise ValueError(msg)
return v


class Config(BaseModel):
model_config = ConfigDict(frozen=True)

directories: DirectoriesConfig
deepfellow: DeepfellowConfig
recipients: tuple[Recipient, ...] = Field(min_length=1)


_config: Config | None = None


def _substitute_env_vars(value: str) -> str:
"""Replace ${VAR} patterns with environment variable values."""

def replacer(match: re.Match[str]) -> str:
var_name = match.group(1)
try:
return os.environ[var_name]
except KeyError:
msg = f"Environment variable '{var_name}' is not set"
raise ValueError(msg) from None

return _ENV_VAR_PATTERN.sub(replacer, value)


def _process_env_vars(data: object) -> object:
"""Recursively walk parsed YAML and substitute env vars in strings."""
if isinstance(data, str):
return _substitute_env_vars(data)
if isinstance(data, dict):
return {k: _process_env_vars(v) for k, v in data.items()}
if isinstance(data, list):
return [_process_env_vars(item) for item in data]
return data


def _find_project_root() -> Path:
"""Walk up from this file's directory looking for config.yaml."""
current = Path(__file__).resolve().parent
while current != current.parent:
if (current / "config.yaml").exists():
return current
current = current.parent
msg = "Could not find config.yaml in any parent directory"
raise FileNotFoundError(msg)


def _resolve_paths(config: Config, root: Path) -> Config:
"""Resolve relative directory paths against the project root."""
watch = config.directories.watch
output = config.directories.output
if not watch.is_absolute():
watch = (root / watch).resolve()
if not output.is_absolute():
output = (root / output).resolve()
return config.model_copy(
update={
"directories": config.directories.model_copy(
update={"watch": watch, "output": output}
)
}
)


def _validate_config(config: Config) -> None:
"""Validate runtime constraints that depend on the environment."""
if not config.directories.watch.exists():
msg = f"Watch directory does not exist: {config.directories.watch}"
raise FileNotFoundError(msg)


def load_config(config_path: Path | None = None) -> Config:
"""Load, parse, validate, and cache the configuration."""
global _config
if _config is not None:
if config_path is None:
return _config
msg = "Configuration is already loaded. Call _reset_config() first to reload."
raise RuntimeError(msg)

load_dotenv()

if config_path is None:
root = _find_project_root()
config_path = root / "config.yaml"
else:
config_path = config_path.resolve()

root = config_path.parent

if not config_path.is_file():
msg = f"Configuration file not found: {config_path}"
raise FileNotFoundError(msg)

try:
text = config_path.read_text(encoding="utf-8")
except (OSError, UnicodeDecodeError) as e:
msg = f"Failed to read configuration file {config_path}: {e}"
raise ValueError(msg) from e

try:
raw = yaml.safe_load(text)
except yaml.YAMLError as e:
msg = f"Failed to parse configuration file {config_path}: {e}"
raise ValueError(msg) from e

if not isinstance(raw, dict):
msg = f"Configuration file is empty or invalid: {config_path}"
raise ValueError(msg)

try:
processed = _process_env_vars(raw)
except ValueError as e:
msg = f"Invalid configuration in {config_path}: {e}"
raise ValueError(msg) from e

try:
config = Config.model_validate(processed)
except ValidationError as e:
msg = f"Invalid configuration in {config_path}: {e}"
raise ValueError(msg) from e

config = _resolve_paths(config, root)
_validate_config(config)

_config = config
return _config


def get_config() -> Config:
"""Return cached config, loading it if necessary."""
if _config is None:
return load_config()
return _config


def _reset_config() -> None:
"""Clear the singleton cache (for tests)."""
global _config
_config = None
Loading