Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/ubuntu
{
"name": "Ubuntu",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/base:jammy"

// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},

// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],

// Use 'postCreateCommand' to run commands after the container is created.
// "postCreateCommand": "uname -a",

// Configure tool-specific properties.
// "customizations": {},

// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
}
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ SimpleAgent is designed with the belief that AI agents don't need to be complex
- **Easy to Extend**: Add new capabilities by creating new command modules
- **Change Summarization**: Automatically summarizes changes made using a cheaper GPT model
- **Modular Architecture**: Core components are separated into their own modules
- **Benchmarking System**: Comprehensive testing framework to verify command functionality

## Project Structure

Expand All @@ -34,8 +35,14 @@ SimpleAgent/
│ │ ├── write_file/
│ │ └── ...
│ └── ... # Other command categories
├── benchmark/ # Benchmark tests
│ ├── __init__.py # Benchmark package initialization
│ ├── test_framework.py # Test discovery and execution framework
│ ├── test_file_ops.py # Tests for file operations
│ └── ... # Tests for other command categories
├── output/ # Generated files and input files directory
├── SimpleAgent.py # Main entry point
├── status.md # Command status report
├── requirements.txt # Dependencies
└── .env # Environment variables (create from .env.example)
```
Expand Down Expand Up @@ -133,6 +140,25 @@ python SimpleAgent.py -a 10 "research and look into https://github.com/PyGithub/
python SimpleAgent.py -a 10 "please research the latest in stock and look at the top 10 stock prices and write them to a file called 'stock_prices.txt'"
```

## Running Benchmarks

SimpleAgent includes a comprehensive benchmark system to verify that all commands are working correctly.

To run the benchmark tests:

```
python SimpleAgent.py --benchmark
```

This will test all available commands and generate a status report in `status.md`.

To view the status report without running the tests:

```
python SimpleAgent.py --status
```

For more information about benchmarks, see the [benchmark README](SimpleAgent/benchmark/README.md).

## Adding New Commands

Expand Down
56 changes: 54 additions & 2 deletions SimpleAgent/SimpleAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@
from core.agent import SimpleAgent
from core.config import OPENAI_API_KEY, MAX_STEPS

# Import benchmark modules
try:
from benchmark.test_framework import discover_and_run_tests, generate_status_markdown, save_status_file
BENCHMARK_AVAILABLE = True
except ImportError:
BENCHMARK_AVAILABLE = False

# Initialize commands
commands.init()

Expand All @@ -39,11 +46,54 @@ def main():
help='Auto-continue for N steps (default: 10 if no number provided)')
parser.add_argument('-m', '--max-steps', type=int, default=10,
help='Maximum number of steps to run (default: 10)')
parser.add_argument('instruction', nargs='+', help='The instruction for the AI agent')
parser.add_argument('-b', '--benchmark', action='store_true',
help='Run benchmark tests for all commands')
parser.add_argument('-s', '--status', action='store_true',
help='Generate a status report for all commands without running tests')
parser.add_argument('-o', '--output', default=None,
help='Output file path for status.md (default: SimpleAgent/status.md)')
parser.add_argument('instruction', nargs='*', help='The instruction for the AI agent')

# Parse arguments
args = parser.parse_args()

# Run benchmarks if requested
if args.benchmark:
if not BENCHMARK_AVAILABLE:
print("Error: Benchmark module not available. Please install it first.")
return 1

print("Running benchmark tests for all commands...")
results = discover_and_run_tests()
status_md = generate_status_markdown(results)
output_path = save_status_file(status_md, args.output)
print(f"Benchmark tests completed! Status file saved to: {output_path}")
return 0

# Generate status report if requested
if args.status:
if not BENCHMARK_AVAILABLE:
print("Error: Benchmark module not available. Please install it first.")
return 1

status_path = args.output or os.path.join(os.path.dirname(__file__), 'status.md')
if os.path.exists(status_path):
print(f"Status report is available at: {status_path}")
with open(status_path, 'r', encoding='utf-8') as f:
# Print summary section
for line in f:
print(line.strip())
if line.strip() == "## Command Status by Category":
break
else:
print(f"Status report not found. Run with --benchmark to generate it.")
return 0

# Ensure instruction is provided if not running benchmarks or status
if not args.instruction:
parser.print_help()
return 1

# Join the instruction parts back together
instruction = ' '.join(args.instruction)

Expand All @@ -53,7 +103,9 @@ def main():
# Initialize and run the agent
agent = SimpleAgent()
agent.run(instruction, max_steps=max_steps, auto_continue=args.auto)

return 0


if __name__ == "__main__":
main()
sys.exit(main())
87 changes: 87 additions & 0 deletions SimpleAgent/benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# SimpleAgent Benchmark Tests

This directory contains benchmark tests for all SimpleAgent commands to ensure they work correctly. The tests are designed to be run periodically to verify that all functionality is working as expected.

## Running Benchmark Tests

There are several ways to run the benchmark tests:

### 1. Using SimpleAgent.py

The simplest way is to use the `--benchmark` flag with SimpleAgent.py:

```bash
python SimpleAgent.py --benchmark
```

This will run all tests and generate a status.md file with the results.

### 2. Using the Benchmark Runner

You can also use the dedicated benchmark runner script:

```bash
./benchmark/run_all.py
```

### 3. Using Individual Test Modules

You can run individual test modules using Python:

```bash
python -m benchmark.test_file_ops
python -m benchmark.test_web_ops
python -m benchmark.test_data_ops
python -m benchmark.test_github_ops
python -m benchmark.test_agent
```

## Viewing Test Results

After running the tests, you can view the results in the generated status.md file. This file contains a summary of all tests and their status.

You can also display a summary of the results using the `--status` flag:

```bash
python SimpleAgent.py --status
```

## Adding New Tests

To add tests for a new command, create a test function in the appropriate test module. The test function should:

1. Be named `test_commandname`
2. Return a tuple of (success, message)
3. Handle exceptions gracefully

Example:

```python
def test_new_command() -> Tuple[bool, str]:
"""Test the new_command command."""
try:
# Test code here
result = new_command(param1, param2)

# Verify the result
if not result:
return False, "Command failed"

return True, "Command successful"
except Exception as e:
return False, f"Exception: {str(e)}"
```

## Test Environment

Tests run in a dedicated test environment with:

- A clean test directory (`TEST_OUTPUT_DIR`)
- Mocked network calls to avoid actual web requests
- Mocked GitHub API calls to avoid actual GitHub operations

This ensures tests can run without external dependencies or side effects.

## Failed Tests

If a test fails, it will be marked as "Failed" in the status.md file. You can fix the issue and run the tests again to update the status.
6 changes: 6 additions & 0 deletions SimpleAgent/benchmark/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""
Benchmark package for SimpleAgent.

This package contains benchmark tests for all SimpleAgent commands to ensure
they work correctly. It also generates a status.md file for tracking command status.
"""
60 changes: 60 additions & 0 deletions SimpleAgent/benchmark/run_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3
"""
Run all SimpleAgent benchmark tests.

This script is a simple shortcut to run benchmark tests for all SimpleAgent commands
and generate a status.md file with the results.
"""

import os
import sys

# Add parent directory to sys.path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# Import test framework
from benchmark.test_framework import (
discover_and_run_tests,
generate_status_markdown,
save_status_file
)

if __name__ == "__main__":
print("Running SimpleAgent benchmark tests...")

# Run all tests
results = discover_and_run_tests()

# Generate status markdown
status_md = generate_status_markdown(results)

# Save status file
output_path = save_status_file(status_md)

print(f"\nBenchmark tests completed!")
print(f"Status file saved to: {output_path}")

# Print summary
total = 0
working = 0
failed = 0
not_tested = 0

for category, commands in results.items():
for cmd, result in commands.items():
total += 1
if result['status'] == 'Working':
working += 1
elif result['status'] == 'Failed':
failed += 1
else:
not_tested += 1

print(f"\nSummary:")
print(f"- Total Commands: {total}")
if total > 0:
print(f"- Working: {working} ({working/total*100:.1f}%)")
print(f"- Failed: {failed} ({failed/total*100:.1f}%)")
print(f"- Not Tested: {not_tested} ({not_tested/total*100:.1f}%)")
else:
print("- No commands found to test")
52 changes: 52 additions & 0 deletions SimpleAgent/benchmark/run_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
Run benchmarks for SimpleAgent commands.

This script runs tests for all SimpleAgent commands and
generates a status.md file with the results.
"""

import os
import sys
import argparse

from benchmark.test_framework import (
discover_and_run_tests,
generate_status_markdown,
save_status_file
)

def main():
parser = argparse.ArgumentParser(description='Run benchmark tests for SimpleAgent commands.')
parser.add_argument('--output', '-o', default=None,
help='Output file path for status.md (default: SimpleAgent/status.md)')
parser.add_argument('--verbose', '-v', action='store_true',
help='Enable verbose output')

args = parser.parse_args()

# Ensure we can import SimpleAgent modules
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

print("Starting SimpleAgent benchmark tests...")

try:
# Run all tests
results = discover_and_run_tests()

# Generate status markdown
status_md = generate_status_markdown(results)

# Save status file
output_path = save_status_file(status_md, args.output)

print(f"\nBenchmark tests completed!")
print(f"Status file saved to: {output_path}")

return 0

except Exception as e:
print(f"Error running benchmark tests: {e}")
return 1

if __name__ == "__main__":
sys.exit(main())
Loading