diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..bbbd5ac --- /dev/null +++ b/.env.example @@ -0,0 +1,40 @@ +# =================================== +# Database Configuration +# =================================== +# For docker-compose +POSTGRES_USER=memu_user +POSTGRES_PASSWORD=memu_pass +POSTGRES_DB=memu_db + +# For application +DATABASE_HOST=localhost +DATABASE_PORT=54320 +DATABASE_USER=memu_user +DATABASE_PASSWORD=memu_pass +DATABASE_NAME=memu_db + +# =================================== +# Temporal Configuration +# =================================== +TEMPORAL_HOST=localhost +TEMPORAL_PORT=17233 +TEMPORAL_NAMESPACE=default + +# =================================== +# LLM Configuration +# =================================== +OPENAI_API_KEY=your_openai_api_key_here +OPENAI_BASE_URL=https://api.openai.com/v1 +DEFAULT_LLM_MODEL=gpt-4o-mini + +# =================================== +# Embedding Configuration +# =================================== +EMBEDDING_API_KEY=your_embedding_api_key_here +EMBEDDING_BASE_URL=https://api.voyageai.com/v1 +EMBEDDING_MODEL=voyage-3.5-lite + +# =================================== +# Storage Configuration +# =================================== +STORAGE_PATH=/var/data/memu-server diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 3c2b995..a902b48 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -7,7 +7,7 @@ body: attributes: value: | Thanks for taking the time to report a bug! Please fill out the form below. - + - type: textarea id: description attributes: @@ -16,7 +16,7 @@ body: placeholder: Describe the bug... validations: required: true - + - type: textarea id: reproduction attributes: @@ -28,7 +28,7 @@ body: 3. See error validations: required: true - + - type: textarea id: expected attributes: @@ -37,7 +37,7 @@ body: placeholder: What should happen? validations: required: true - + - type: textarea id: actual attributes: @@ -46,7 +46,7 @@ body: placeholder: What actually happened? validations: required: true - + - type: textarea id: environment attributes: @@ -58,7 +58,7 @@ body: - memU-server Version: [e.g. 1.0.0] validations: required: false - + - type: textarea id: logs attributes: @@ -67,7 +67,7 @@ body: render: shell validations: required: false - + - type: textarea id: additional attributes: diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index 14cdfc0..ec47481 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -7,7 +7,7 @@ body: attributes: value: | Thanks for suggesting a new feature! Please fill out the form below. - + - type: textarea id: problem attributes: @@ -16,7 +16,7 @@ body: placeholder: I'm always frustrated when... validations: required: true - + - type: textarea id: solution attributes: @@ -25,7 +25,7 @@ body: placeholder: I would like to have... validations: required: true - + - type: textarea id: alternatives attributes: @@ -34,7 +34,7 @@ body: placeholder: Alternative approaches could be... validations: required: false - + - type: textarea id: benefits attributes: @@ -43,7 +43,7 @@ body: placeholder: This feature would help users... validations: required: false - + - type: textarea id: additional attributes: diff --git a/.github/ISSUE_TEMPLATE/improvement_suggestion.yml b/.github/ISSUE_TEMPLATE/improvement_suggestion.yml index dbe2079..bd0875b 100644 --- a/.github/ISSUE_TEMPLATE/improvement_suggestion.yml +++ b/.github/ISSUE_TEMPLATE/improvement_suggestion.yml @@ -7,7 +7,7 @@ body: attributes: value: | Thanks for suggesting an improvement! Please fill out the form below. - + - type: textarea id: current attributes: @@ -16,7 +16,7 @@ body: placeholder: Currently, the system... validations: required: true - + - type: textarea id: suggested attributes: @@ -25,7 +25,7 @@ body: placeholder: It would be better if... validations: required: true - + - type: textarea id: rationale attributes: @@ -34,7 +34,7 @@ body: placeholder: This would improve... validations: required: true - + - type: textarea id: impact attributes: @@ -43,7 +43,7 @@ body: placeholder: This would affect... validations: required: false - + - type: textarea id: implementation attributes: @@ -52,7 +52,7 @@ body: placeholder: This could be implemented by... validations: required: false - + - type: textarea id: additional attributes: diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml new file mode 100644 index 0000000..eb6148c --- /dev/null +++ b/.github/workflows/code-quality.yml @@ -0,0 +1,53 @@ +name: Code Quality + +on: + push: + branches: [ main, develop, 'feature/**' ] + pull_request: + branches: [ main, develop ] + +jobs: + quality-check: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.13"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install uv + run: | + curl -LsSf https://astral.sh/uv/install.sh | sh + echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: Install dependencies + run: | + uv venv + uv sync + + - name: Check code formatting + run: | + uv run ruff format --check . + + - name: Lint code + run: | + uv run ruff check . + + - name: Run tests + run: | + uv run pytest -v --cov=app --cov-report=xml --cov-report=term + + - name: Upload coverage reports + if: matrix.python-version == '3.13' + uses: codecov/codecov-action@v4 + with: + file: ./coverage.xml + fail_ci_if_error: false + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.gitignore b/.gitignore index 1eca64f..9c84b24 100644 --- a/.gitignore +++ b/.gitignore @@ -1,20 +1,8 @@ -/_local/ -/data -docs/source -config/secrets -.secrets/ -node_modules -# From https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore - -# Byte-compiled / optimized / DLL files +# Python __pycache__/ *.py[cod] *$py.class - -# C extensions *.so - -# Distribution / packaging .Python build/ develop-eggs/ @@ -28,121 +16,57 @@ parts/ sdist/ var/ wheels/ -share/python-wheels/ *.egg-info/ .installed.cfg *.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints -# IPython -profile_default/ -ipython_config.py - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py +# Virtual Environment +venv/ +env/ +ENV/ +.venv/ -# Environments +# Environment Variables .env .env.dev +.env.prod .env.local -.env.*.local -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -**/config/*.secret.py -# Spyder project settings -.spyderproject -.spyproject +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ -# Rope project settings -.ropeproject +# Database +*.db +*.sqlite3 +*.sql -# mkdocs documentation -/site +# Logs +*.log +logs/ +server.log +nohup.out -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json +# Storage +storage/* +!storage/.gitkeep +data/ -# Pyre type checker -.pyre/ +# Alembic +alembic/versions/__pycache__/ -# pytype static type analyzer -.pytype/ +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.tox/ -# Cython debug symbols -cython_debug/ +# OS +.DS_Store +Thumbs.db -# Vscode config files -.vscode/ -.claude/ -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -.idea/ +# Docker +docker-compose.override.yml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..f5b3db7 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,124 @@ +# Pre-commit hooks configuration +# Install: pre-commit install +# Run manually: pre-commit run --all-files + +repos: + # Ruff - Fast Python linter and formatter + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.14.13 + hooks: + # Run the linter + - id: ruff + args: [--fix] + # Run the formatter + - id: ruff-format + + # Built-in hooks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: check-yaml + name: Check YAML syntax + - id: check-json + name: Check JSON syntax + - id: check-toml + name: Check TOML syntax + - id: end-of-file-fixer + name: Fix end of files + - id: trailing-whitespace + name: Trim trailing whitespace + - id: check-added-large-files + name: Check for large files + args: ['--maxkb=1000'] + - id: check-merge-conflict + name: Check for merge conflicts + - id: detect-private-key + name: Detect private keys + + # Python-specific checks + - repo: https://github.com/pre-commit/pygrep-hooks + rev: v1.10.0 + hooks: + - id: python-check-blanket-noqa + name: Check blanket noqa + - id: python-check-blanket-type-ignore + name: Check blanket type ignore + - id: python-no-eval + name: Check for eval() + - id: python-use-type-annotations + name: Check for type annotations + + # Security checks with bandit + - repo: https://github.com/PyCQA/bandit + rev: 1.9.3 + hooks: + - id: bandit + name: Security check with bandit + args: ['-c', 'pyproject.toml'] + additional_dependencies: ['bandit[toml]'] + + # Additional Python quality checks + - repo: local + hooks: + - id: pylint + name: pylint + entry: pylint + language: system + types: [python] + args: ['--rcfile=pyproject.toml', '--fail-under=8.0'] + require_serial: true + - id: check-test-coverage + name: Check test coverage threshold + entry: python .pre-commit-hooks/check_test_coverage.py + language: system + pass_filenames: false + args: ['35'] # Minimum 35% coverage (gradually increase as tests are added) + stages: [pre-push] # Only run before pushing, not on every commit + - id: check-alembic-migrations + name: Check Alembic migrations are not empty + entry: python .pre-commit-hooks/check_alembic_migrations.py + language: system + pass_filenames: false + files: ^alembic/versions/.*\.py$ + - id: check-config-credentials + name: Check config files for hardcoded credentials + entry: python .pre-commit-hooks/check_config_credentials.py + language: system + pass_filenames: false + types_or: [ini, yaml, toml] + - id: check-python-credentials + name: Check Python code for hardcoded credentials + entry: python .pre-commit-hooks/check_python_credentials.py + language: system + pass_filenames: false + types: [python] + - id: check-makefile-commands + name: Check Makefile for deprecated commands + entry: python .pre-commit-hooks/check_makefile_commands.py + language: system + pass_filenames: false + files: ^Makefile$ + - id: check-markdown-formatting + name: Check Markdown formatting issues + entry: python .pre-commit-hooks/check_markdown_formatting.py + language: system + pass_filenames: false + types: [markdown] +# Global settings +default_language_version: + python: python3.13 + +# Files to exclude +exclude: | + (?x)^( + \.venv/| + venv/| + \.git/| + __pycache__/| + \.pytest_cache/| + \.ruff_cache/| + \.mypy_cache/| + build/| + dist/| + .*\.egg-info/ + ) diff --git a/.pre-commit-hooks/check_alembic_migrations.py b/.pre-commit-hooks/check_alembic_migrations.py new file mode 100755 index 0000000..88ebea3 --- /dev/null +++ b/.pre-commit-hooks/check_alembic_migrations.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""Check Alembic migration files for empty upgrades/downgrades.""" + +import ast +import sys +from pathlib import Path + + +def check_migration_file(filepath: Path) -> tuple[bool, str]: + """Check if migration file has actual content.""" + try: + with open(filepath, encoding="utf-8") as f: + tree = ast.parse(f.read()) + + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef): + if node.name in ("upgrade", "downgrade"): + # Check if function body is just 'pass' or comments + body = [ + n + for n in node.body + if not isinstance(n, (ast.Pass, ast.Expr)) + or (isinstance(n, ast.Expr) and not isinstance(n.value, ast.Constant)) + ] + if not body: + return ( + False, + f"{filepath}: {node.name}() is empty (only pass statements)", + ) + return True, "" + except Exception as e: + return False, f"{filepath}: Error parsing file: {e}" + + +def main(): + """Main function.""" + alembic_versions = Path("alembic/versions") + if not alembic_versions.exists(): + return 0 + + migration_files = [f for f in alembic_versions.glob("*.py") if f.name != "__init__.py"] + + errors = [] + for migration_file in migration_files: + is_valid, error = check_migration_file(migration_file) + if not is_valid: + errors.append(error) + + if errors: + print("โŒ Empty Alembic migrations detected:") + for error in errors: + print(f" - {error}") + print( + "\n๐Ÿ’ก Hint: Make sure your models are imported in alembic/env.py " + "before running 'alembic revision --autogenerate'" + ) + return 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.pre-commit-hooks/check_config_credentials.py b/.pre-commit-hooks/check_config_credentials.py new file mode 100755 index 0000000..8f3b681 --- /dev/null +++ b/.pre-commit-hooks/check_config_credentials.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +"""Check configuration files for hardcoded credentials.""" + +import re +import sys +from pathlib import Path + +# Patterns that indicate hardcoded credentials +CREDENTIAL_PATTERNS = [ + # Database URLs with actual credentials + (r"postgresql://[^$@{}][^@]*:[^$@{}][^@]*@", "PostgreSQL URL with hardcoded credentials"), + (r"mysql://[^$@{}][^@]*:[^$@{}][^@]*@", "MySQL URL with hardcoded credentials"), + (r"mongodb://[^$@{}][^@]*:[^$@{}][^@]*@", "MongoDB URL with hardcoded credentials"), + # Common password/key patterns with quotes (INI, TOML) + (r'^[^#]*password\s*=\s*["\'](?!\$\{)[^"\'].*["\']', "Hardcoded password"), + (r'^[^#]*api[_-]?key\s*=\s*["\'](?!\$\{)[^"\'].*["\']', "Hardcoded API key"), + (r'^[^#]*secret\s*=\s*["\'](?!\$\{)[^"\'].*["\']', "Hardcoded secret"), + # YAML-style patterns without quotes (docker-compose.yml, k8s configs) + (r"^\s*POSTGRES_PASSWORD\s*:\s*(?!\$\{)[a-zA-Z0-9_]+\s*$", "Hardcoded PostgreSQL password in YAML"), + (r"^\s*POSTGRES_USER\s*:\s*(?!\$\{)[a-zA-Z0-9_]+\s*$", "Hardcoded PostgreSQL user in YAML"), + (r"^\s*MYSQL_PASSWORD\s*:\s*(?!\$\{)[a-zA-Z0-9_]+\s*$", "Hardcoded MySQL password in YAML"), + (r"^\s*MYSQL_USER\s*:\s*(?!\$\{)[a-zA-Z0-9_]+\s*$", "Hardcoded MySQL user in YAML"), + (r"^\s*password\s*:\s*(?!\$\{)[\w@#$%^&*()_+\-=]+\s*$", "Hardcoded password in YAML"), +] + +# File patterns to check +CONFIG_FILES = [ + "*.ini", + "*.conf", + "*.config", + "*.cfg", + "*.yaml", + "*.yml", + "*.toml", + ".env.example", +] + + +def check_file_for_credentials(filepath: Path) -> list[tuple[int, str]]: + """Check a file for hardcoded credentials.""" + issues = [] + try: + with open(filepath, encoding="utf-8") as f: + for line_num, line in enumerate(f, 1): + # Skip comment-only lines + if line.strip().startswith("#"): + continue + + for pattern, description in CREDENTIAL_PATTERNS: + if re.search(pattern, line, re.IGNORECASE): + issues.append((line_num, f"{description}: {line.strip()[:80]}...")) + except Exception as e: + print(f"Warning: Could not check {filepath}: {e}") + + return issues + + +def main(): + """Main function.""" + root = Path(".") + errors = [] + + for pattern in CONFIG_FILES: + for filepath in root.rglob(pattern): + # Skip .venv and other ignored directories + if any(part in filepath.parts for part in [".venv", "venv", ".git", "__pycache__", "node_modules"]): + continue + + file_issues = check_file_for_credentials(filepath) + if file_issues: + errors.append((filepath, file_issues)) + + if errors: + print("โŒ Hardcoded credentials detected in configuration files:") + for filepath, issues in errors: + print(f"\n {filepath}:") + for line_num, description in issues: + print(f" Line {line_num}: {description}") + print( + "\n๐Ÿ’ก Hint: Use environment variable placeholders like ${VAR_NAME} " + "or ${DATABASE_USER} instead of hardcoded values." + ) + return 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.pre-commit-hooks/check_makefile_commands.py b/.pre-commit-hooks/check_makefile_commands.py new file mode 100755 index 0000000..d13c44c --- /dev/null +++ b/.pre-commit-hooks/check_makefile_commands.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""Check Makefile commands for common issues.""" + +import re +import sys +from pathlib import Path + + +def check_makefile_commands(makefile_path: str = "Makefile") -> int: + """ + Check Makefile for deprecated or incorrect command patterns. + + Args: + makefile_path: Path to the Makefile + + Returns: + 0 if all checks pass, 1 if issues found + """ + makefile = Path(makefile_path) + if not makefile.exists(): + print(f"โš ๏ธ Makefile not found at {makefile_path}") + return 0 + + content = makefile.read_text() + issues = [] + + # Check for deprecated uv pip install patterns + deprecated_patterns = [ + ( + r'uv\s+pip\s+install\s+-e\s+"?\.\[.*?\]"?', + "uv pip install -e '.[extras]' is deprecated with PEP 735 dependency-groups", + "Use 'uv sync' or 'uv sync --group ' instead", + ), + ] + + for pattern, message, suggestion in deprecated_patterns: + matches = re.finditer(pattern, content, re.MULTILINE) + for match in matches: + # Find line number + line_num = content[: match.start()].count("\n") + 1 + issues.append( + { + "line": line_num, + "match": match.group(), + "message": message, + "suggestion": suggestion, + } + ) + + if issues: + print("โŒ Found issues in Makefile:") + for issue in issues: + print(f"\n Line {issue['line']}: {issue['match']}") + print(f" โš ๏ธ {issue['message']}") + print(f" ๐Ÿ’ก {issue['suggestion']}") + return 1 + + print("โœ… Makefile commands look good") + return 0 + + +if __name__ == "__main__": + sys.exit(check_makefile_commands()) diff --git a/.pre-commit-hooks/check_markdown_formatting.py b/.pre-commit-hooks/check_markdown_formatting.py new file mode 100755 index 0000000..67de564 --- /dev/null +++ b/.pre-commit-hooks/check_markdown_formatting.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +"""Check Markdown files for formatting issues.""" + +import re +import sys +from pathlib import Path + + +def check_markdown_file(filepath: Path) -> list[tuple[int, str]]: + """Check a Markdown file for common formatting issues.""" + issues = [] + try: + content = filepath.read_text(encoding="utf-8") + lines = content.split("\n") + + for line_num, line in enumerate(lines, 1): + # Check for escaped backticks in code fences + if re.match(r"^\\`\\`\\`", line): + issues.append( + ( + line_num, + f"Escaped code fence (\\`\\`\\`) should be unescaped (```): {line[:50]}", + ) + ) + + # Check for inconsistent code fence markers + if re.match(r"^`{3,}[^`]", line) or re.match(r"^`{3,}$", line): + # This is a proper code fence, check if previous/next lines have escaped ones + pass + + # Check for duplicate content blocks (same 5+ consecutive lines) + seen_blocks = {} + block_size = 5 + for i in range(len(lines) - block_size): + block = tuple(lines[i : i + block_size]) + # Skip empty blocks + if all(not line.strip() for line in block): + continue + + block_str = "\n".join(block) + if block in seen_blocks and block_str.strip(): + issues.append( + ( + i + 1, + f"Duplicate content block found (also at line {seen_blocks[block]})", + ) + ) + else: + seen_blocks[block] = i + 1 + + except Exception as e: + print(f"Warning: Could not check {filepath}: {e}") + + return issues + + +def main(): + """Main function.""" + root = Path(".") + errors = [] + + for filepath in root.rglob("*.md"): + # Skip .venv and other ignored directories + if any(part in filepath.parts for part in [".venv", "venv", ".git", "__pycache__", "node_modules"]): + continue + + file_issues = check_markdown_file(filepath) + if file_issues: + errors.append((filepath, file_issues)) + + if errors: + print("โŒ Markdown formatting issues detected:") + for filepath, issues in errors: + print(f"\n {filepath}:") + for line_num, description in issues: + print(f" Line {line_num}: {description}") + print("\n๐Ÿ’ก Fix escaped code fences by replacing \\`\\`\\` with ```") + print("๐Ÿ’ก Remove duplicate content blocks to keep documentation concise") + return 1 + + print("โœ… Markdown files look good") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.pre-commit-hooks/check_python_credentials.py b/.pre-commit-hooks/check_python_credentials.py new file mode 100755 index 0000000..639700f --- /dev/null +++ b/.pre-commit-hooks/check_python_credentials.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +"""Check Python source code for hardcoded credentials.""" + +import re +import sys +from pathlib import Path + + +def check_python_file_for_credentials(filepath: Path) -> list[tuple[int, str]]: + """Check a Python file for hardcoded credentials.""" + issues = [] + try: + with open(filepath, encoding="utf-8") as f: + for line_num, line in enumerate(f, 1): + # Skip comments + if line.strip().startswith("#"): + continue + + # Check for database URLs with embedded credentials + # Match patterns like: postgresql://user:pass@host + db_url_pattern = r'["\'](?:postgresql|mysql|mongodb)(?:\+\w+)?://[^$:{}]+:[^$@{}]+@[^"\']*["\']' + if re.search(db_url_pattern, line): + # Exclude patterns that use environment variables or config + if not re.search(r"os\.getenv|os\.environ|\$\{|\{[a-zA-Z_]", line): + issues.append( + ( + line_num, + f"Database URL with hardcoded credentials: {line.strip()[:100]}...", + ) + ) + + # Check for explicit password assignments + # password = "something" + password_pattern = r'password\s*=\s*["\'][^"\']{3,}["\']' # nosec B105 + if re.search(password_pattern, line, re.IGNORECASE): + # Skip if it's using environment variables + if not re.search(r"os\.getenv|os\.environ|getenv|environ\[", line): + issues.append( + ( + line_num, + f"Hardcoded password assignment: {line.strip()[:80]}...", + ) + ) + + # Check for API keys in strings + api_key_pattern = r'["\'](?:sk-|pk_live_|pk_test_)[a-zA-Z0-9]{20,}["\']' + if re.search(api_key_pattern, line): + issues.append( + ( + line_num, + f"Hardcoded API key detected: {line.strip()[:80]}...", + ) + ) + + except Exception as e: + print(f"Warning: Could not check {filepath}: {e}") + + return issues + + +def main(): + """Main function.""" + root = Path(".") + errors = [] + + # Check all Python files in app/ directory + for filepath in root.rglob("*.py"): + # Skip .venv and test files + if any( + part in filepath.parts + for part in [".venv", "venv", ".git", "__pycache__", "node_modules", "tests", "test_"] + ): + continue + + file_issues = check_python_file_for_credentials(filepath) + if file_issues: + errors.append((filepath, file_issues)) + + if errors: + print("โŒ Hardcoded credentials detected in Python source code:") + for filepath, issues in errors: + print(f"\n {filepath}:") + for line_num, description in issues: + print(f" Line {line_num}: {description}") + print('\n๐Ÿ’ก Use os.getenv() with fallback values for development: os.getenv("DATABASE_URL", "default_value")') + print( + "๐Ÿ’ก For production, ensure environment variables are set without hardcoded defaults " + "or add validation at startup" + ) + return 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.pre-commit-hooks/check_test_coverage.py b/.pre-commit-hooks/check_test_coverage.py new file mode 100755 index 0000000..d70144c --- /dev/null +++ b/.pre-commit-hooks/check_test_coverage.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +"""Check that test coverage meets minimum threshold.""" + +import subprocess # nosec B404 +import sys + + +def check_coverage(min_coverage: int = 80) -> int: + """ + Run pytest with coverage and check if it meets minimum threshold. + + Args: + min_coverage: Minimum required coverage percentage (default 80%) + + Returns: + 0 if coverage meets threshold, 1 otherwise + """ + try: + # Run pytest with coverage + result = subprocess.run( # nosec B603 B607 + [ + "pytest", + "--cov=app", + "--cov-report=term-missing", + f"--cov-fail-under={min_coverage}", + "--quiet", + "tests/", + ], + capture_output=True, + text=True, + check=False, + ) + + # Print output + if result.stdout: + print(result.stdout) + if result.stderr: + print(result.stderr, file=sys.stderr) + + # Check if coverage threshold was met + if result.returncode != 0: + print(f"\nโŒ Test coverage is below {min_coverage}%") + print("๐Ÿ’ก Run 'make test-cov' to see detailed coverage report") + print("๐Ÿ’ก Add tests to increase coverage or adjust threshold in .pre-commit-config.yaml") + return 1 + + print(f"โœ… Test coverage meets minimum threshold of {min_coverage}%") + return 0 + + except FileNotFoundError: + print("โŒ pytest not found. Install dev dependencies: make dev", file=sys.stderr) + return 1 + except Exception as e: + print(f"โŒ Error checking coverage: {e}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + min_threshold = int(sys.argv[1]) if len(sys.argv) > 1 else 80 + sys.exit(check_coverage(min_threshold)) diff --git a/.python-version b/.python-version index 6324d40..24ee5b1 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.14 +3.13 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2f54f3a --- /dev/null +++ b/Makefile @@ -0,0 +1,110 @@ +.PHONY: help install dev test test-cov check format lint clean run docker-up docker-down migrate pre-commit-install pre-commit-run migrate-check + +# Default target +help: + @echo "Available commands:" + @echo " make install - Install dependencies" + @echo " make dev - Install dev dependencies" + @echo " make test - Run tests" + @echo " make test-cov - Run tests with coverage report" + @echo " make check - Run all quality checks (format, lint, test)" + @echo " make format - Format code with ruff" + @echo " make format-check - Check code formatting without changes" + @echo " make lint - Lint code with ruff" + @echo " make clean - Clean cache and build files" + @echo " make run - Run the server" + @echo " make docker-up - Start Docker services" + @echo " make docker-down - Stop Docker services" + @echo " make migrate - Run database migrations" + @echo " make migrate-check - Check migrations for empty content" + @echo " make pre-commit-install - Install pre-commit hooks" + @echo " make pre-commit-run - Run pre-commit on all files" + +# Install production dependencies +install: + uv sync --no-dev + +# Install development dependencies +dev: + uv sync + +# Run tests +test: + @pytest -v || echo "โš ๏ธ No tests found or tests failed" + +# Run tests with coverage +test-cov: + @echo "๐Ÿ” Running tests with coverage..." + @pytest --cov=app --cov-report=term-missing --cov-report=html --cov-fail-under=80 tests/ + +# Run all quality checks +check: format-check lint test + @echo "โœ… All quality checks passed!" + +# Format code +format: + ruff format . + +# Check formatting without making changes +format-check: + @echo "๐Ÿ” Checking code formatting..." + ruff format --check . + +# Lint code +lint: + @echo "๐Ÿ” Linting code..." + ruff check . + @echo "๐Ÿ” Running pylint..." + pylint app/ --fail-under=8.0 || true + +# Clean cache and build files +clean: + find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true + find . -type d -name ".pytest_cache" -exec rm -rf {} + 2>/dev/null || true + find . -type d -name ".ruff_cache" -exec rm -rf {} + 2>/dev/null || true + find . -type d -name "*.egg-info" -exec rm -rf {} + 2>/dev/null || true + find . -type f -name "*.pyc" -delete + rm -rf dist/ build/ .coverage htmlcov/ + +# Run the development server +run: + uvicorn app.main:app --reload --host 0.0.0.0 --port 8000 + +# Start Docker services +docker-up: + docker compose up -d + +# Stop Docker services +docker-down: + docker compose down + +# Run database migrations +migrate: + alembic upgrade head + +# Run migrations rollback +migrate-down: + alembic downgrade -1 + +# Create a new migration +migrate-create: + @read -p "Enter migration description: " desc; \ + alembic revision --autogenerate -m "$$desc" + @echo "โš ๏ธ Don't forget to review the generated migration file!" + @python .pre-commit-hooks/check_alembic_migrations.py || true + +# Check migrations for issues +migrate-check: + @python .pre-commit-hooks/check_alembic_migrations.py + +# Install pre-commit hooks +pre-commit-install: + @echo "๐Ÿ“ฆ Installing pre-commit..." + uv pip install pre-commit + pre-commit install + @echo "โœ… Pre-commit hooks installed! They will run automatically on git commit." + +# Run pre-commit on all files +pre-commit-run: + @echo "๐Ÿ” Running pre-commit checks on all files..." + pre-commit run --all-files diff --git a/README.md b/README.md index c8d7d2b..bcf27cc 100644 --- a/README.md +++ b/README.md @@ -1,132 +1,355 @@ -# memU-server: Local Backend Service for AI Memory System - -memU-server is the backend management service for MemU, responsible for providing API endpoints, data storage, and management capabilities, as well as deep integration with the core memU framework. It powers the frontend memU-ui with reliable data support, ensuring efficient reading, writing, and maintenance of Agent memories. memU-server can be deployed locally or in private environments and supports quick startup and configuration via Docker, enabling developers to manage the AI memory system in a secure environment. - -- Core Algorithm ๐Ÿ‘‰ memU: https://github.com/NevaMind-AI/memU -- One call = response + memory ๐Ÿ‘‰ memU Response API: https://memu.pro/docs#responseapi -- Try it instantly ๐Ÿ‘‰ https://app.memu.so/quick-start - ---- - -## โญ Star Us on GitHub - -Star memU-server to get notified about new releases and join our growing community of AI developers building intelligent agents with persistent memory capabilities. -๐Ÿ’ฌ Join our Discord community: https://discord.gg/memu - ---- - -## ๐Ÿš€ Get Started - -### Run from source -1. Ensure you have Python 3.14+ and [uv](https://docs.astral.sh/uv/) installed. -2. Clone the repository and enter it: - ```bash - git clone https://github.com/NevaMind-AI/memU-server.git - cd memU-server - ``` -3. Set your OpenAI API key in the environment: - ```bash - export OPENAI_API_KEY=your_api_key_here - ``` -4. Install dependencies and start the FastAPI dev server: - ```bash - uv sync - uv run fastapi dev - ``` - The server runs on `http://127.0.0.1:8000`. - -### Run with Docker -1. Export your OpenAI API key so Docker can read it: - ```bash - export OPENAI_API_KEY=your_api_key_here - ``` -2. Pull the latest image: - ```bash - docker pull nevamindai/memu-server:latest - ``` -3. Start the container (optionally mount a host directory to persist `./data`): - ```bash - docker run --rm -p 8000:8000 \ - -e OPENAI_API_KEY=$OPENAI_API_KEY \ - nevamindai/memu-server:latest - ``` - Access the API at `http://127.0.0.1:8000`. - -### API Endpoints -- `POST /memorize`: persist a conversation-style payload for later retrieval. Example body shape: - ```json - { - "content": [ - {"role": "user", "content": {"text": "..."}, "created_at": "YYYY-MM-DD HH:MM:SS"}, - {"role": "assistant", "content": {"text": "..."}, "created_at": "YYYY-MM-DD HH:MM:SS"} - ] - } - ``` -- `POST /retrieve`: query stored memories with a text prompt: - ```json - {"query": "your question about the conversation"} - ``` -- To smoke-test locally, set `MEMU_API_URL` (defaults to `http://127.0.0.1:12345`), POST a conversation to `/memorize`, then call `/retrieve` with a text query. +# memU-server: Local Backend Service for AI Memory System + +memU-server is the backend management service for MemU, responsible for providing API endpoints, data storage, and management capabilities, as well as deep integration with the core memU framework. It powers the frontend memU-ui with reliable data support, ensuring efficient reading, writing, and maintenance of Agent memories. memU-server can be deployed locally or in private environments and supports quick startup and configuration via Docker, enabling developers to manage the AI memory system in a secure environment. + +- Core Algorithm ๐Ÿ‘‰ memU: https://github.com/NevaMind-AI/memU +- One call = response + memory ๐Ÿ‘‰ memU Response API: https://memu.pro/docs#responseapi +- Try it instantly ๐Ÿ‘‰ https://app.memu.so/quick-start --- +## โœจ Features + +- ๐Ÿง  **Memorize**: Store and process conversational memories asynchronously +- ๐Ÿ” **Retrieve**: Query and retrieve relevant memories with semantic search +- ๐Ÿš€ **Async First**: Built with FastAPI and async/await for high performance +- ๐Ÿ—„๏ธ **PostgreSQL + pgvector**: Vector similarity search with efficient indexing +- โšก **Temporal Workflows**: Reliable async task orchestration +- ๐Ÿ”ง **Type Safe**: Full type hints with Pydantic and SQLModel +- ๐Ÿงช **Well Tested**: Comprehensive test coverage with pytest + +## ๐Ÿ—๏ธ Architecture + +- **Web Framework**: FastAPI with async/await +- **Database**: PostgreSQL 16 with pgvector extension +- **ORM**: SQLModel (SQLAlchemy + Pydantic) +- **Workflow Engine**: Temporal for async task processing +- **Memory Engine**: memu-py for intelligent memory management +- **Migrations**: Alembic for database schema management + +## ๐Ÿš€ Quick Start + +### Prerequisites + +- Python 3.13+ +- Docker & Docker Compose +- [uv](https://github.com/astral-sh/uv) (recommended) or pip + +### Installation + +1. **Clone the repository** +```bash +git clone https://github.com/NevaMind-AI/memU-server.git +cd memU-server +``` + +2. **Set up Python environment** +```bash +# Create virtual environment with uv (recommended) +uv venv +source .venv/bin/activate + +# Or with standard venv +python -m venv .venv +source .venv/bin/activate # On Windows: .venv\Scripts\activate +``` + +3. **Install dependencies** +```bash +# With uv (fast) +uv pip install -e . + +# Or with pip +pip install -e . +``` + +4. **Configure environment** +```bash +cp .env.example .env +# Edit .env with your configuration +``` + +5. **Start infrastructure services** +```bash +docker compose up -d +``` + +This will start: +- PostgreSQL with pgvector on port 54320 +- Temporal server on ports 17233 (gRPC) and 18233 (Web UI) + +6. **Run database migrations** +```bash +alembic upgrade head +``` + +7. **Start the server** +```bash +# Development mode with auto-reload +uvicorn app.main:app --reload --host 0.0.0.0 --port 8000 + +# Or in background +nohup uvicorn app.main:app --host 0.0.0.0 --port 8000 > server.log 2>&1 & +``` + +The API will be available at: +- API: http://localhost:8000 +- API Docs (Swagger): http://localhost:8000/docs +- Alternative Docs (ReDoc): http://localhost:8000/redoc + +## ๐Ÿ“ Configuration + +Key environment variables in \`.env\`: + +```bash +# Database +DATABASE_HOST=localhost +DATABASE_PORT=54320 +DATABASE_USER=memu_user +DATABASE_PASSWORD=memu_pass +DATABASE_NAME=memu_db + +# Temporal +TEMPORAL_HOST=localhost +TEMPORAL_PORT=17233 +TEMPORAL_NAMESPACE=default + +# LLM +OPENAI_API_KEY=your_api_key_here +OPENAI_BASE_URL=https://api.openai.com/v1 +DEFAULT_LLM_MODEL=gpt-4o-mini + +# Embedding +EMBEDDING_API_KEY=your_embedding_key_here +EMBEDDING_BASE_URL=https://api.voyageai.com/v1 +EMBEDDING_MODEL=voyage-3.5-lite + +# Storage +STORAGE_PATH=/var/data/memu-server +``` + +## ๐Ÿงช Testing + +Run tests with pytest or make: + +```bash +# Run all tests +pytest + +# Or use make command +make test + +# Run with coverage +pytest --cov=app --cov-report=html + +# Run specific test file +pytest tests/test_health.py -v +``` + +## ๐Ÿ”ง Code Quality + +### Automated Quality Checks + +This project uses multiple layers of quality assurance: + +**1. Pre-commit Hooks** (Recommended) + +Install pre-commit hooks to automatically check code before every commit: + +```bash +# Install pre-commit hooks +make pre-commit-install + +# Now hooks will run automatically on git commit +# To manually run on all files: +make pre-commit-run +``` + +**2. Manual Quality Checks** + +```bash +# Show all available commands +make help + +# Run all quality checks (format, lint, test) +make check + +# Format code with ruff +make format + +# Check formatting without changes +make format-check + +# Lint code +make lint + +# Run tests +make test + +# Clean cache files +make clean +``` + +**3. CI/CD Pipeline** + +GitHub Actions automatically runs quality checks on: +- Every push to `main`, `develop`, or `feature/**` branches +- Every pull request + +### Best Practices + +**Before committing code:** +```bash +# Option 1: Let pre-commit hooks handle it (recommended) +git add . +git commit -m "your message" # Hooks run automatically + +# Option 2: Run checks manually first +make check +git add . +git commit -m "your message" +``` + +**Pre-commit hooks will:** +- โœ… Format code with ruff +- โœ… Fix common issues automatically +- โœ… Check YAML, JSON, TOML syntax +- โœ… Detect private keys and large files +- โœ… Trim whitespace and fix line endings +- โŒ Block commit if critical issues found + +**What gets checked:** +- Code formatting (ruff format) +- Linting rules (ruff check) +- File syntax (YAML, JSON, TOML) +- Security issues (private keys) +- File size limits +- Merge conflicts + +**Before committing code, always run:** +```bash +make check +``` + +This will ensure your code is properly formatted, linted, and all tests pass. + +## ๐Ÿ—ƒ๏ธ Database Models + +### Memory +Stores individual memory entries with vector embeddings for semantic search. + +### MemoryCategory +Organizes memories into categories with metadata. + +### MemorizeTask +Tracks async memory processing tasks with status and results. + +## ๐Ÿ”„ API Endpoints + +### Memorize +```bash +POST /memorize +``` +Store new memory asynchronously. + +### Retrieve +```bash +POST /retrieve +``` +Query and retrieve relevant memories. + +### Health Check +```bash +GET / +``` +Returns service status. + +## ๐Ÿ› ๏ธ Development + +### Project Structure +``` +memU-server/ +โ”œโ”€โ”€ app/ +โ”‚ โ”œโ”€โ”€ api/ # API routes +โ”‚ โ”œโ”€โ”€ models/ # Database models +โ”‚ โ”œโ”€โ”€ services/ # Business logic +โ”‚ โ”œโ”€โ”€ workers/ # Temporal workers +โ”‚ โ”œโ”€โ”€ utils/ # Utilities +โ”‚ โ”œโ”€โ”€ database.py # Database configuration +โ”‚ โ””โ”€โ”€ main.py # FastAPI application +โ”œโ”€โ”€ config/ +โ”‚ โ””โ”€โ”€ settings.py # Configuration management +โ”œโ”€โ”€ tests/ # Test suite +โ”œโ”€โ”€ alembic/ # Database migrations +โ”œโ”€โ”€ docker-compose.yml +โ”œโ”€โ”€ pyproject.toml +โ””โ”€โ”€ README.md +``` + +### Code Quality + +```bash +# Format code with ruff +ruff format . + +# Lint code +ruff check . + +# Type checking (if mypy is installed) +mypy app/ +``` + +### Database Migrations + +```bash +# Create a new migration +alembic revision --autogenerate -m "description" + +# Apply migrations +alembic upgrade head + +# Rollback one migration +alembic downgrade -1 + +# View migration history +alembic history +``` + +## ๐Ÿณ Docker + +Build and run with Docker: + +```bash +# Build image +docker build -f dockerfiles/Dockerfile -t memu-server . + +# Run container +docker run -d \ + --name memu-server \ + -p 8000:8000 \ + --env-file .env \ + memu-server +``` + +## ๐Ÿ“Š Monitoring + +- **Temporal Web UI**: http://localhost:18233 +- **Database**: Connect to PostgreSQL on port 54320 + +## ๐Ÿค Contributing + +1. Fork the repository +2. Create a feature branch (\`git checkout -b feature/amazing-feature\`) +3. Commit your changes (\`git commit -m 'feat: add amazing feature'\`) +4. Push to the branch (\`git push origin feature/amazing-feature\`) +5. Open a Pull Request + +## ๐Ÿ“„ License + +See [LICENSE](LICENSE) file for details. + +## ๐Ÿ™ Acknowledgments -## ๐Ÿ”‘ Key Features - -### Quick Deployment -- Docker image provided -- Launch backend service and database with a single command -- Provides API endpoints compatible with memU-ui, ensuring stable and reliable data services - -### Comprehensive Memory Management -(Some features planned for future releases) -- Memory Data Management - - Support creating, reading, and deleting Memory Submissions - - Memorize results support create, read, update, and delete (CRUD) operations - - Retrieve records support querying and tracking - - Tracks LLM token usage for transparent and controllable costs -- User and Permission Management - - User login and registration system - - Role-based access control: Developer / Admin / Regular User - - Backend manages access scope and permissions for secure operations - ---- - -## ๐Ÿงฉ Why MemU? - -Most memory systems in current LLM pipelines rely heavily on explicit modeling, requiring manual definition and annotation of memory categories. This limits AIโ€™s ability to truly understand memory and makes it difficult to support diverse usage scenarios. - -MemU offers a flexible and robust alternative, inspired by hierarchical storage architecture in computer systems. It progressively transforms heterogeneous input data into queryable and interpretable textual memory. - -Its core architecture consists of three layers: **Resource Layer โ†’ Memory Item Layer โ†’ MemoryCategory Layer**. - -Three-Layer Architecture Diagram - -- Resource Layer: Multimodal raw data warehouse -- Memory Item Layer: Discrete extracted memory units -- MemoryCategory Layer: Aggregated textual memory units - -### Key Features: -- Full Traceability: Track from raw data โ†’ items โ†’ documents and back -- Memory Lifecycle: Memorization โ†’ Retrieval โ†’ Self-evolution -- Two Retrieval Methods: - - RAG-based: Fast embedding vector search - - LLM-based: Direct file reading with deep semantic understanding -- Self-Evolving: Adapts memory structure based on usage patterns - -process - ---- - -## ๐Ÿ“„ License - -By contributing to memU-server, you agree that your contributions will be licensed under the **AGPL-3.0 License**. - ---- - -## ๐ŸŒ Community - -For more information please contact info@nevamind.ai - -- GitHub Issues: Report bugs, request features, and track development. [Submit an issue](https://github.com/NevaMind-AI/memU-server/issues) -- Discord: Get real-time support, chat with the community, and stay updated. [Join us](https://discord.com/invite/hQZntfGsbJ) -- X (Twitter): Follow for updates, AI insights, and key announcements. [Follow us](https://x.com/memU_ai) +- [memu-py](https://github.com/mem0ai/memu-py) - Core memory management engine +- [FastAPI](https://fastapi.tiangolo.com/) - Modern web framework +- [Temporal](https://temporal.io/) - Workflow orchestration +- [pgvector](https://github.com/pgvector/pgvector) - Vector similarity search diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..efbbe36 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,113 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# Example: configure the SQLAlchemy database URL using environment variables: +sqlalchemy.url = postgresql+psycopg://${DATABASE_USER}:${DATABASE_PASSWORD}@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_NAME} + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 0000000..5e063de --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,79 @@ +"""Alembic migration environment configuration.""" + +# pylint: disable=no-member +from logging.config import fileConfig + +from sqlalchemy import engine_from_config, pool + +from alembic import context + +# Import app models early for autogenerate support +from app.database import Base + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +target_metadata = Base.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 0000000..55df286 --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/api/v1/__init__.py b/app/api/v1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/database.py b/app/database.py new file mode 100644 index 0000000..8663086 --- /dev/null +++ b/app/database.py @@ -0,0 +1,61 @@ +"""Database configuration and session management.""" + +import logging +import os + +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine +from sqlalchemy.orm import declarative_base + +logger = logging.getLogger(__name__) + +# Database URL from environment variables +# Priority: DATABASE_URL > constructed from individual variables +DATABASE_URL = os.getenv("DATABASE_URL") +if not DATABASE_URL: + db_host = os.getenv("DATABASE_HOST") + db_port = os.getenv("DATABASE_PORT") + db_user = os.getenv("DATABASE_USER") + db_pass = os.getenv("DATABASE_PASSWORD") + db_name = os.getenv("DATABASE_NAME") + + missing_vars = [ + name + for name, value in [ + ("DATABASE_HOST", db_host), + ("DATABASE_PORT", db_port), + ("DATABASE_USER", db_user), + ("DATABASE_PASSWORD", db_pass), + ("DATABASE_NAME", db_name), + ] + if not value + ] + + if missing_vars: + raise RuntimeError( + f"Database configuration is incomplete. Missing environment variables: {', '.join(missing_vars)}" + ) + + DATABASE_URL = f"postgresql+psycopg://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}" + +# Create SQLAlchemy async engine +engine = create_async_engine( + DATABASE_URL, + pool_pre_ping=True, + pool_size=10, + max_overflow=20, +) +# Async session factory +SessionLocal: async_sessionmaker[AsyncSession] = async_sessionmaker( + autocommit=False, + autoflush=False, + expire_on_commit=False, + bind=engine, +) +# Base class for models +Base = declarative_base() + + +async def get_db(): + """Dependency for FastAPI to get async database session.""" + async with SessionLocal() as db: + yield db diff --git a/app/main.py b/app/main.py index ecd8357..a114ba2 100644 --- a/app/main.py +++ b/app/main.py @@ -1,21 +1,51 @@ import json import os -from pathlib import Path import traceback -from typing import Any, Dict import uuid -from fastapi.responses import JSONResponse +from pathlib import Path +from typing import Any + from fastapi import FastAPI, HTTPException +from fastapi.responses import JSONResponse from memu.app import MemoryService -app = FastAPI() -service = MemoryService(llm_config={"api_key": os.getenv("OPENAI_API_KEY")}) +app = FastAPI(title="memU Server", version="0.1.0") -storage_dir = Path(os.getenv("MEMU_STORAGE_DIR", "./data")) +# Ensure required environment variables are set +openai_api_key = os.getenv("OPENAI_API_KEY") +if not openai_api_key: + raise RuntimeError( + "OPENAI_API_KEY environment variable is not set or is empty. " + "Set OPENAI_API_KEY to a valid OpenAI API key before starting the server." + ) + +# Initialize MemoryService with proper configuration +database_url = os.getenv("DATABASE_URL") +if not database_url: + raise RuntimeError( + "DATABASE_URL environment variable is not set. " + "Please set DATABASE_URL to your PostgreSQL connection string. " + "Example: postgresql+psycopg://user:pass@localhost:54320/dbname" + ) + +service = MemoryService( + llm_profiles={ + "default": { + "provider": "openai", + "api_key": openai_api_key, + "base_url": os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"), + "model": os.getenv("DEFAULT_LLM_MODEL", "gpt-4o-mini"), + } + }, + database_config={"url": database_url}, +) + +storage_dir = Path(os.getenv("STORAGE_PATH", "./data")) storage_dir.mkdir(parents=True, exist_ok=True) + @app.post("/memorize") -async def memorize(payload: Dict[str, Any]): +async def memorize(payload: dict[str, Any]): try: file_path = storage_dir / f"conversation-{uuid.uuid4().hex}.json" with file_path.open("w", encoding="utf-8") as f: @@ -25,17 +55,18 @@ async def memorize(payload: Dict[str, Any]): return JSONResponse(content={"status": "success", "result": result}) except Exception as exc: traceback.print_exc() - raise HTTPException(status_code=500, detail=str(exc)) + raise HTTPException(status_code=500, detail=str(exc)) from exc + @app.post("/retrieve") -async def retrieve(payload: Dict[str, Any]): +async def retrieve(payload: dict[str, Any]): if "query" not in payload: raise HTTPException(status_code=400, detail="Missing 'query' in request body") try: result = await service.retrieve([payload["query"]]) return JSONResponse(content={"status": "success", "result": result}) except Exception as exc: - raise HTTPException(status_code=500, detail=str(exc)) + raise HTTPException(status_code=500, detail=str(exc)) from exc @app.get("/") diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/utils/__init__.py b/app/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/workers/__init__.py b/app/workers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..c9a1c49 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,49 @@ +services: + # PostgreSQL with pgvector extension + postgres: + image: pgvector/pgvector:pg16 + container_name: memu-postgres + environment: + POSTGRES_USER: ${POSTGRES_USER:-memu_user} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-memu_pass} + POSTGRES_DB: ${POSTGRES_DB:-memu_db} + ports: + - "54320:5432" + volumes: + - postgres-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-memu_user}"] + interval: 5s + timeout: 5s + retries: 5 + networks: + - memu-network + + # Temporal server + temporal: + image: temporalio/auto-setup:latest + container_name: memu-temporal + depends_on: + postgres: + condition: service_healthy + environment: + - DB=postgresql + - DB_PORT=5432 + - POSTGRES_USER=memu_user + - POSTGRES_PWD=memu_pass + - POSTGRES_SEEDS=postgres + - POSTGRES_DB=temporal + - DYNAMIC_CONFIG_FILE_PATH=config/dynamicconfig/development-sql.yaml + ports: + - "17233:7233" # Temporal gRPC + - "18233:8233" # Temporal Web UI + networks: + - memu-network + +volumes: + postgres-data: + driver: local + +networks: + memu-network: + driver: bridge diff --git a/pyproject.toml b/pyproject.toml index fa88439..6519a74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,15 +1,94 @@ [project] name = "memu-server" version = "0.1.0" -description = "backend wrapper for memU" -readme = "README.md" -requires-python = ">=3.14" +description = "Standalone memory service with memorize and retrieve" +requires-python = ">=3.13" + dependencies = [ - "fastapi[standard]>=0.122.0", - "memu-py==0.6.0", + # Web Framework + "fastapi[standard]>=0.121.0", + "uvicorn[standard]>=0.35.0", + + # Database & ORM + "sqlmodel>=0.0.27", + "sqlalchemy[asyncio]>=2.0.41", + "psycopg[binary,pool]>=3.2.9", + "alembic>=1.16.2", + "pgvector>=0.3.2", + + # Workflow Engine + "temporalio==1.16.0", + + # Memory Engine + "memu-py[postgres]>=1.2.0", + + # LLM & AI + "openai>=1.54.4", + + # Configuration & Utils + "pydantic-settings>=2.10.1", + "python-dotenv>=1.0.0", + "pendulum>=3.1.0", ] [dependency-groups] dev = [ - "requests>=2.32.5", + "pytest>=7.2.0", + "pytest-asyncio>=0.23.0", + "pytest-cov>=6.0.0", + "httpx>=0.27.0", + "ruff>=0.11.0", + "pre-commit>=4.0.0", + "bandit[toml]>=1.8.0", + "pylint>=3.0.0", + "mypy>=1.8.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["app"] + +[tool.ruff] +line-length = 120 +target-version = "py313" + +[tool.ruff.lint] +# Enable import sorting checks +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort (import sorting) + "UP", # pyupgrade + "B", # flake8-bugbear ] +ignore = [] + +[tool.ruff.lint.isort] +known-first-party = ["app", "config"] +section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] + +[tool.bandit] +exclude_dirs = [".venv", "venv", "tests", ".git", "__pycache__"] +# B105: hardcoded_password_string +# B106: hardcoded_password_funcarg +# B107: hardcoded_password_default +skips = [] + +[tool.pylint.messages_control] +disable = ["C0111", "C0103"] # missing-docstring, invalid-name +enable = ["W0102", "W0212", "W0611"] # dangerous-default-value, protected-access, unused-import + +[tool.mypy] +python_version = "3.13" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = false # Gradually enable +ignore_missing_imports = true diff --git a/storage/.gitkeep b/storage/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_env_validation.py b/tests/test_env_validation.py new file mode 100644 index 0000000..d8da597 --- /dev/null +++ b/tests/test_env_validation.py @@ -0,0 +1,133 @@ +"""Test environment variable validation.""" + +import os +import sys + +import pytest + + +def test_app_requires_openai_api_key(): + """Test that app refuses to start when OPENAI_API_KEY is not set.""" + # Save original value + original_key = os.environ.get("OPENAI_API_KEY") + + try: + # Remove the environment variable + if "OPENAI_API_KEY" in os.environ: + del os.environ["OPENAI_API_KEY"] + + # Remove app.main from sys.modules to force reimport + if "app.main" in sys.modules: + del sys.modules["app.main"] + + # Importing app should raise RuntimeError + with pytest.raises(RuntimeError, match="OPENAI_API_KEY environment variable is not set or is empty"): + from app.main import app # noqa: F401 + + finally: + # Restore original value + if original_key: + os.environ["OPENAI_API_KEY"] = original_key + # Clean up module cache + if "app.main" in sys.modules: + del sys.modules["app.main"] + + +def test_app_refuses_empty_openai_api_key(): + """Test that app refuses to start when OPENAI_API_KEY is empty.""" + # Save original value + original_key = os.environ.get("OPENAI_API_KEY") + + try: + # Set to empty string + os.environ["OPENAI_API_KEY"] = "" + + # Remove app.main from sys.modules to force reimport + if "app.main" in sys.modules: + del sys.modules["app.main"] + + # Importing app should raise RuntimeError + with pytest.raises(RuntimeError, match="OPENAI_API_KEY environment variable is not set or is empty"): + from app.main import app # noqa: F401 + + finally: + # Restore original value + if original_key: + os.environ["OPENAI_API_KEY"] = original_key + else: + if "OPENAI_API_KEY" in os.environ: + del os.environ["OPENAI_API_KEY"] + # Clean up module cache + if "app.main" in sys.modules: + del sys.modules["app.main"] + + +def test_app_requires_database_url(): + """Test that app refuses to start when DATABASE_URL is not set.""" + # Save original values + original_key = os.environ.get("OPENAI_API_KEY") + original_db = os.environ.get("DATABASE_URL") + + try: + # Set valid API key but no DATABASE_URL + os.environ["OPENAI_API_KEY"] = "test-key" + if "DATABASE_URL" in os.environ: + del os.environ["DATABASE_URL"] + + # Remove app.main from sys.modules to force reimport + if "app.main" in sys.modules: + del sys.modules["app.main"] + + # Importing app should raise RuntimeError + with pytest.raises(RuntimeError, match="DATABASE_URL environment variable is not set"): + from app.main import app # noqa: F401 + + finally: + # Restore original values + if original_key: + os.environ["OPENAI_API_KEY"] = original_key + if original_db: + os.environ["DATABASE_URL"] = original_db + # Clean up module cache + if "app.main" in sys.modules: + del sys.modules["app.main"] + + +def test_app_starts_with_valid_openai_api_key(): + """Test that app starts successfully with valid OPENAI_API_KEY.""" + # Save original value + original_key = os.environ.get("OPENAI_API_KEY") + original_db = os.environ.get("DATABASE_URL") + + try: + # Set valid key and database URL + os.environ["OPENAI_API_KEY"] = "test-valid-key" + os.environ["DATABASE_URL"] = "postgresql+psycopg://test_user:test_pass@localhost:54320/test_db" + + # Remove app.main from sys.modules to force reimport + if "app.main" in sys.modules: + del sys.modules["app.main"] + + # Should not raise + from app.main import app + + assert app is not None + assert app.title == "memU Server" + + finally: + # Restore original value + if original_key: + os.environ["OPENAI_API_KEY"] = original_key + else: + if "OPENAI_API_KEY" in os.environ: + del os.environ["OPENAI_API_KEY"] + + if original_db: + os.environ["DATABASE_URL"] = original_db + else: + if "DATABASE_URL" in os.environ: + del os.environ["DATABASE_URL"] + + # Clean up module cache + if "app.main" in sys.modules: + del sys.modules["app.main"] diff --git a/tests/test_health.py b/tests/test_health.py new file mode 100644 index 0000000..8d1d292 --- /dev/null +++ b/tests/test_health.py @@ -0,0 +1,22 @@ +"""Basic health check tests.""" + +import os + +from fastapi.testclient import TestClient + +# Set required environment variables for testing before importing app +os.environ["OPENAI_API_KEY"] = "test-key-for-testing" +os.environ["DATABASE_URL"] = "postgresql+psycopg://test_user:test_pass@localhost:54320/test_db" + +from app.main import app # noqa: E402 # pylint: disable=wrong-import-position + +client = TestClient(app) + + +def test_root_endpoint(): + """Test root endpoint returns welcome message.""" + response = client.get("/") + assert response.status_code == 200 + data = response.json() + assert "message" in data + assert data["message"] == "Hello MemU user!"