diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..db5a06f --- /dev/null +++ b/alembic.ini @@ -0,0 +1,124 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library. +# Any required deps can be installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# Database URL Configuration +# +# Note: This project reads the database URL from environment variables in alembic/env.py. +# The sqlalchemy.url setting is intentionally left unset here. +# +# Supported environment variables (in priority order): +# 1) DATABASE_URL - Full connection URL (e.g., postgresql+psycopg://user:pass@host:5432/db) +# 2) Individual variables: DATABASE_HOST, DATABASE_PORT, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME +# +# The env.py automatically normalizes URL schemes (postgres://, postgresql://, postgresql+asyncpg://) +# to postgresql+psycopg:// for Alembic migrations. +# +# sqlalchemy.url = + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 0000000..67b02d5 --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,152 @@ +"""Alembic migration environment configuration.""" + +import os + +# pylint: disable=no-member +from logging.config import fileConfig +from urllib.parse import quote + +from sqlalchemy import pool + +from alembic import context + +# Import Base metadata for autogenerate support +# Note: We import from app.models.base which is side-effect free +# (doesn't create database connections or read environment variables) +from app.models.base import Base + + +def get_sync_database_url() -> str: + """ + Get synchronous database URL for Alembic migrations. + + Alembic uses synchronous database connections, so we need to convert + the async URL (postgresql+asyncpg://) to sync format (postgresql+psycopg://). + + Returns: + str: Synchronous database connection URL + """ + # First try DATABASE_URL from environment + database_url = os.getenv("DATABASE_URL") + if database_url: + # Normalize common PostgreSQL DSNs to use the psycopg (sync) driver. + # Handle bare postgres:// and postgresql:// URLs that don't specify a driver. + if database_url.startswith("postgres://"): + # postgres://user:pass@host/db -> postgresql+psycopg://user:pass@host/db + database_url = "postgresql+psycopg://" + database_url[len("postgres://") :] + elif database_url.startswith("postgresql://") and not database_url.startswith("postgresql+"): + # postgresql://user:pass@host/db -> postgresql+psycopg://user:pass@host/db + database_url = "postgresql+psycopg://" + database_url[len("postgresql://") :] + + # Convert async driver to sync driver if needed + # postgresql+asyncpg:// -> postgresql+psycopg:// + if database_url.startswith("postgresql+asyncpg://"): + database_url = "postgresql+psycopg://" + database_url[len("postgresql+asyncpg://") :] + + return database_url + + # Construct from individual variables + db_host = os.getenv("DATABASE_HOST") + db_port = os.getenv("DATABASE_PORT", "5432") # Default PostgreSQL port + db_user = os.getenv("DATABASE_USER") + db_pass = os.getenv("DATABASE_PASSWORD") + db_name = os.getenv("DATABASE_NAME") + + # Validate required environment variables (consistent with app/database.py) + missing_vars = [ + name + for name, value in [ + ("DATABASE_HOST", db_host), + ("DATABASE_USER", db_user), + ("DATABASE_PASSWORD", db_pass), + ("DATABASE_NAME", db_name), + ] + if not value + ] + + if missing_vars: + raise RuntimeError( + f"Database configuration is incomplete. Missing environment variables: {', '.join(missing_vars)}" + ) + + # At this point, we know these are not None + assert db_host is not None + assert db_user is not None + assert db_pass is not None + assert db_name is not None + + # URL-encode username and password to handle special characters + # Use quote(..., safe="") instead of quote_plus() for URL userinfo section + db_user_encoded = quote(db_user, safe="") + db_pass_encoded = quote(db_pass, safe="") + + # Use psycopg (sync) for Alembic migrations + return f"postgresql+psycopg://{db_user_encoded}:{db_pass_encoded}@{db_host}:{db_port}/{db_name}" + + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +target_metadata = Base.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + # Get URL from environment variables + url = get_sync_database_url() + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + from sqlalchemy import create_engine + + # Get URL from environment variables and create engine directly + url = get_sync_database_url() + connectable = create_engine(url, poolclass=pool.NullPool) + + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 0000000..55df286 --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/alembic/versions/.gitkeep b/alembic/versions/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/app/database.py b/app/database.py new file mode 100644 index 0000000..ace0607 --- /dev/null +++ b/app/database.py @@ -0,0 +1,107 @@ +"""Database configuration and session management.""" + +import os +from urllib.parse import quote + +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine + +from app.models.base import Base + + +def get_database_url() -> str: + """ + Get database URL from environment variables. + + Priority: DATABASE_URL > constructed from individual variables + + Returns: + str: Database connection URL + + Raises: + RuntimeError: If required environment variables are missing + """ + database_url = os.getenv("DATABASE_URL") + if database_url: + # Normalize common PostgreSQL DSNs to use the psycopg async driver. + # Handle bare postgres:// and postgresql:// URLs that don't specify a driver. + if database_url.startswith("postgres://"): + # postgres://user:pass@host/db -> postgresql+psycopg://user:pass@host/db + database_url = "postgresql+psycopg://" + database_url[len("postgres://") :] + elif database_url.startswith("postgresql://") and not database_url.startswith("postgresql+"): + # postgresql://user:pass@host/db -> postgresql+psycopg://user:pass@host/db + database_url = "postgresql+psycopg://" + database_url[len("postgresql://") :] + + # Convert asyncpg driver to psycopg if needed (asyncpg is not a project dependency) + # postgresql+asyncpg:// -> postgresql+psycopg:// + if database_url.startswith("postgresql+asyncpg://"): + database_url = "postgresql+psycopg://" + database_url[len("postgresql+asyncpg://") :] + + return database_url + + # Construct from individual variables + db_host = os.getenv("DATABASE_HOST") + db_port = os.getenv("DATABASE_PORT", "5432") # Default PostgreSQL port + db_user = os.getenv("DATABASE_USER") + db_pass = os.getenv("DATABASE_PASSWORD") + db_name = os.getenv("DATABASE_NAME") + + # TODO: Improve validation to check for empty strings explicitly + # Current check 'if not value' treats empty string as missing + missing_vars = [ + name + for name, value in [ + ("DATABASE_HOST", db_host), + ("DATABASE_USER", db_user), + ("DATABASE_PASSWORD", db_pass), + ("DATABASE_NAME", db_name), + ] + if not value + ] + + if missing_vars: + raise RuntimeError( + f"Database configuration is incomplete. Missing environment variables: {', '.join(missing_vars)}" + ) + + # At this point, we know db_user, db_pass, db_host, db_name are not None + # Use assertion to help mypy understand this + assert db_user is not None + assert db_pass is not None + assert db_host is not None + assert db_name is not None + + # URL-encode username and password to handle special characters like '@', ':', '/' + # Use quote(..., safe="") instead of quote_plus() for URL userinfo section + db_user_encoded = quote(db_user, safe="") + db_pass_encoded = quote(db_pass, safe="") + + return f"postgresql+psycopg://{db_user_encoded}:{db_pass_encoded}@{db_host}:{db_port}/{db_name}" + + +# TODO: Consider lazy initialization to avoid executing during module import +# This would prevent database connection issues from failing tests that don't use the database +# Get database URL using the shared function +DATABASE_URL = get_database_url() + +# Create SQLAlchemy async engine +engine = create_async_engine( + DATABASE_URL, + pool_pre_ping=True, + pool_size=10, + max_overflow=20, +) +# Async session factory +# Note: autocommit is removed as it's not supported in SQLAlchemy 2.x async_sessionmaker +SessionLocal: async_sessionmaker[AsyncSession] = async_sessionmaker( + autoflush=False, + expire_on_commit=False, + bind=engine, +) +# Re-export Base for backward compatibility +__all__ = ["Base", "SessionLocal", "engine", "get_db", "get_database_url"] + + +async def get_db(): + """Dependency for FastAPI to get async database session.""" + async with SessionLocal() as db: + yield db diff --git a/app/main.py b/app/main.py index 75ae299..b59db47 100644 --- a/app/main.py +++ b/app/main.py @@ -9,10 +9,36 @@ from fastapi.responses import JSONResponse from memu.app import MemoryService -app = FastAPI() -service = MemoryService(llm_config={"api_key": os.getenv("OPENAI_API_KEY")}) +from app.database import get_database_url -storage_dir = Path(os.getenv("MEMU_STORAGE_DIR", "./data")) +app = FastAPI(title="memU Server", version="0.1.0") + +# Ensure required environment variables are set +openai_api_key = os.getenv("OPENAI_API_KEY") +if not openai_api_key: + raise RuntimeError( + "OPENAI_API_KEY environment variable is not set or is empty. " + "Set OPENAI_API_KEY to a valid OpenAI API key before starting the server." + ) + +# Get database URL using shared configuration utility +database_url = get_database_url() + +service = MemoryService( + llm_profiles={ + "default": { + "provider": "openai", + "api_key": openai_api_key, + "base_url": os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"), + "model": os.getenv("DEFAULT_LLM_MODEL", "gpt-4o-mini"), + } + }, + database_config={"url": database_url}, +) + +# Storage directory for conversation files +# Support both new STORAGE_PATH and legacy MEMU_STORAGE_DIR for backward compatibility +storage_dir = Path(os.getenv("STORAGE_PATH") or os.getenv("MEMU_STORAGE_DIR") or "./data") storage_dir.mkdir(parents=True, exist_ok=True) diff --git a/app/models/base.py b/app/models/base.py new file mode 100644 index 0000000..a0916be --- /dev/null +++ b/app/models/base.py @@ -0,0 +1,14 @@ +"""SQLAlchemy Base class for model definitions. + +This module is intentionally side-effect-free - it only defines the Base class +without creating any database connections or reading environment variables. +This allows safe imports from alembic/env.py for migration autogeneration. +""" + +from sqlalchemy.orm import DeclarativeBase + + +class Base(DeclarativeBase): + """Base class for all SQLAlchemy models.""" + + pass diff --git a/pyproject.toml b/pyproject.toml index f09729d..7ef794a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -121,6 +121,9 @@ DEP002 = [ "python-dotenv", # Environment variables (future use) "pendulum", # Date/time handling (future use) ] +DEP003 = [ + "app", # Project's own package, not an external dependency +] [tool.mypy] python_version = "3.13"