Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 39 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,42 @@ environments/dataset/data/sorting/2_boxes/*
environments/dataset/data/sorting/4_boxes/*
environments/dataset/data/sorting/6_boxes/*
environments/dataset/data/stacking/all_data/*
environments/dataset/data/stacking/vision_data/*
environments/dataset/data/stacking/vision_data/*

# Testing and Coverage
.pytest_cache/
.coverage
htmlcov/
coverage.xml
.tox/
.cache/
nosetests.xml
coverage/
*.cover
.hypothesis/

# Claude settings
.claude/*

# Virtual environments
venv/
env/
.venv/
.env/
ENV/
env.bak/
venv.bak/

# IDE files
.vscode/
*.sublime-project
*.sublime-workspace

# OS files
.DS_Store
Thumbs.db

# Build artifacts
build/
dist/
*.egg-info/
282 changes: 282 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

81 changes: 81 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "behaviour-cloning-benchmarks"
version = "0.1.0"
description = "A comprehensive benchmarking framework for behaviour cloning algorithms"
authors = ["Your Name <your.email@example.com>"]
readme = "README.md"
packages = [{include = "agents"}, {include = "environments"}, {include = "simulation"}]

[tool.poetry.dependencies]
python = "^3.8"
# Add production dependencies here as needed

[tool.poetry.group.test.dependencies]
pytest = "^7.4.0"
pytest-cov = "^4.1.0"
pytest-mock = "^3.11.1"

[tool.poetry.scripts]
test = "pytest:main"
tests = "pytest:main"

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py", "*_test.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = [
"--strict-markers",
"--strict-config",
"--verbose",
"--cov=agents",
"--cov=environments",
"--cov=simulation",
"--cov-report=term-missing",
"--cov-report=html:htmlcov",
"--cov-report=xml:coverage.xml",
"--cov-fail-under=80"
]
markers = [
"unit: Unit tests",
"integration: Integration tests",
"slow: Slow running tests"
]

[tool.coverage.run]
source = ["agents", "environments", "simulation"]
omit = [
"*/tests/*",
"*/test_*",
"*/__pycache__/*",
"*/venv/*",
"*/env/*",
"*/.venv/*",
"*/.env/*",
"*/setup.py",
"*/conftest.py",
"*/__init__.py"
]

[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"def __repr__",
"if self.debug:",
"if settings.DEBUG",
"raise AssertionError",
"raise NotImplementedError",
"if 0:",
"if __name__ == .__main__.:",
"class .*\\bProtocol\\):",
"@(abc\\.)?abstractmethod"
]
show_missing = true
precision = 2

[tool.coverage.html]
directory = "htmlcov"
Empty file added tests/__init__.py
Empty file.
227 changes: 227 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
"""
Shared pytest fixtures for the behaviour cloning benchmarks test suite.
"""
import os
import tempfile
import shutil
from pathlib import Path
from typing import Dict, Any, Generator
import pytest
from unittest.mock import Mock, MagicMock


@pytest.fixture
def temp_dir() -> Generator[Path, None, None]:
"""
Create a temporary directory that is automatically cleaned up after the test.

Returns:
Path: Path to the temporary directory
"""
temp_path = Path(tempfile.mkdtemp())
try:
yield temp_path
finally:
shutil.rmtree(temp_path, ignore_errors=True)


@pytest.fixture
def temp_file(temp_dir: Path) -> Path:
"""
Create a temporary file within a temporary directory.

Args:
temp_dir: The temporary directory fixture

Returns:
Path: Path to the temporary file
"""
temp_file = temp_dir / "test_file.txt"
temp_file.write_text("test content")
return temp_file


@pytest.fixture
def mock_config() -> Dict[str, Any]:
"""
Mock configuration dictionary for testing.

Returns:
Dict: Mock configuration with common test values
"""
return {
"model": {
"name": "test_model",
"hidden_dim": 128,
"num_layers": 2,
},
"training": {
"batch_size": 32,
"learning_rate": 0.001,
"num_epochs": 10,
},
"data": {
"sequence_length": 100,
"normalize": True,
},
"environment": {
"name": "test_env",
"max_steps": 1000,
}
}


@pytest.fixture
def mock_device():
"""
Mock device for testing (CPU/CUDA agnostic).

Returns:
str: Mock device string
"""
return "cpu"


@pytest.fixture
def mock_agent():
"""
Mock agent for testing agent functionality.

Returns:
Mock: Mock agent with common methods
"""
agent = MagicMock()
agent.predict.return_value = [0.5, 0.3, 0.2] # Mock action
agent.train.return_value = {"loss": 0.1, "accuracy": 0.95}
agent.save.return_value = True
agent.load.return_value = True
return agent


@pytest.fixture
def mock_dataset():
"""
Mock dataset for testing data loading functionality.

Returns:
Mock: Mock dataset with common methods
"""
dataset = MagicMock()
dataset.__len__.return_value = 100
dataset.__getitem__.return_value = {
"observations": [1.0, 2.0, 3.0],
"actions": [0.1, 0.2, 0.3],
"rewards": 1.0
}
return dataset


@pytest.fixture
def mock_environment():
"""
Mock environment for testing simulation functionality.

Returns:
Mock: Mock environment with gym-like interface
"""
env = MagicMock()
env.reset.return_value = [0.0, 0.0, 0.0] # Mock observation
env.step.return_value = ([0.1, 0.1, 0.1], 1.0, False, {}) # obs, reward, done, info
env.close.return_value = None
return env


@pytest.fixture
def sample_observation():
"""
Sample observation data for testing.

Returns:
list: Sample observation vector
"""
return [0.1, 0.2, 0.3, 0.4, 0.5]


@pytest.fixture
def sample_action():
"""
Sample action data for testing.

Returns:
list: Sample action vector
"""
return [0.0, 1.0, 0.0]


@pytest.fixture(scope="session")
def test_data_dir() -> Path:
"""
Directory containing test data files.

Returns:
Path: Path to test data directory
"""
return Path(__file__).parent / "data"


@pytest.fixture
def clean_environment():
"""
Clean environment variables for testing.
Saves and restores environment state.
"""
old_environ = dict(os.environ)
# Clear potentially interfering environment variables
test_vars_to_clear = [
"CUDA_VISIBLE_DEVICES",
"PYTHONPATH",
]

for var in test_vars_to_clear:
os.environ.pop(var, None)

try:
yield
finally:
os.environ.clear()
os.environ.update(old_environ)


@pytest.fixture
def disable_gpu():
"""
Force CPU-only mode for testing.
"""
old_environ = dict(os.environ)
os.environ["CUDA_VISIBLE_DEVICES"] = ""

try:
yield
finally:
os.environ.clear()
os.environ.update(old_environ)


# Pytest configuration hooks
def pytest_configure(config):
"""Configure pytest with custom markers."""
config.addinivalue_line(
"markers", "unit: mark test as a unit test"
)
config.addinivalue_line(
"markers", "integration: mark test as an integration test"
)
config.addinivalue_line(
"markers", "slow: mark test as slow running"
)


def pytest_collection_modifyitems(config, items):
"""Automatically mark tests based on their location."""
for item in items:
# Mark tests in unit/ directory as unit tests
if "unit" in str(item.fspath):
item.add_marker(pytest.mark.unit)
# Mark tests in integration/ directory as integration tests
elif "integration" in str(item.fspath):
item.add_marker(pytest.mark.integration)
Empty file added tests/integration/__init__.py
Empty file.
25 changes: 25 additions & 0 deletions tests/integration/test_sample_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
Sample integration test to demonstrate integration test structure.
"""
import pytest


@pytest.mark.integration
class TestSampleIntegration:
"""Sample integration test class."""

def test_basic_integration(self):
"""Basic integration test."""
assert True

def test_with_multiple_fixtures(self, mock_agent, mock_environment):
"""Integration test using multiple fixtures."""
# Simulate agent interacting with environment
obs = mock_environment.reset()
action = mock_agent.predict(obs)
next_obs, reward, done, info = mock_environment.step(action)

assert obs == [0.0, 0.0, 0.0]
assert action == [0.5, 0.3, 0.2]
assert next_obs == [0.1, 0.1, 0.1]
assert reward == 1.0
Loading