From 1597ce1908c56cc57a37ce99fdeea8a30b19f6ff Mon Sep 17 00:00:00 2001 From: Spark Zou Date: Sun, 25 Jan 2026 14:40:15 +0900 Subject: [PATCH 01/10] feat: add database migration support and connection utilities Add Alembic for database schema migrations and centralize database connection logic. ## Changes ### Database Migration (Alembic) - Add alembic.ini configuration file * Script location: alembic/ * File template: %%%(rev)s_%%%(slug)s * Timezone support: UTC - Add alembic/env.py migration environment * Async SQLAlchemy support * TODO: Connect target_metadata to models for autogenerate - Add alembic/script.py.mako migration template * Standard Alembic revision template ### Database Connection Module (app/database.py) - Add get_database_url() utility function * Centralized URL construction logic (DRY principle) * URL-encodes credentials to handle special characters * Supports DATABASE_URL env var or individual components * Security: Prevents credential leak in error messages ### Application Updates (app/main.py) - Refactor to use shared get_database_url() function - Remove duplicate database URL construction code - Improve error message clarity without exposing credentials ## Security Improvements - URL-encode database credentials (urllib.parse.quote_plus) - Prevent password exposure in error messages - Secure connection string handling ## Benefits - Consistent database URL handling across app - Support for passwords with special characters - Easy to test and maintain (single source of truth) - Ready for schema version control ## Usage ```bash # Create a new migration alembic revision -m "description" # Apply migrations alembic upgrade head # Rollback migrations alembic downgrade -1 ``` ## TODO - Implement lazy database initialization - Add environment variable support in alembic.ini - Connect target_metadata for autogenerate feature ## Files Changed (5 files) - alembic.ini: Alembic configuration - alembic/env.py: Migration environment setup - alembic/script.py.mako: Migration template - app/database.py: NEW - Database connection utilities - app/main.py: Refactored to use shared database module --- alembic.ini | 124 +++++++++++++++++++++++++++++++++++++++++ alembic/env.py | 88 +++++++++++++++++++++++++++++ alembic/script.py.mako | 24 ++++++++ app/database.py | 87 +++++++++++++++++++++++++++++ app/main.py | 48 ++++++++++++---- 5 files changed, 361 insertions(+), 10 deletions(-) create mode 100644 alembic.ini create mode 100644 alembic/env.py create mode 100644 alembic/script.py.mako create mode 100644 app/database.py diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..0f3e809 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,124 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# Example: configure the SQLAlchemy database URL. +# +# Note: Alembic's ini file does NOT support shell-style ${VAR} interpolation. +# TODO: Implement environment variable support in alembic/env.py to read DATABASE_URL +# If you want to use environment variables, you have two common options: +# +# 1) Use ConfigParser-style interpolation (requires setting these in env.py): +# sqlalchemy.url = postgresql+psycopg://%(DATABASE_USER)s:%(DATABASE_PASSWORD)s@%(DATABASE_HOST)s:%(DATABASE_PORT)s/%(DATABASE_NAME)s +# +# 2) Omit sqlalchemy.url here and instead set the SQLALCHEMY_URL environment +# variable or pass the URL on the Alembic command line. +# +# sqlalchemy.url = + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 0000000..bd13eee --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,88 @@ +"""Alembic migration environment configuration.""" + +# pylint: disable=no-member +from logging.config import fileConfig + +from sqlalchemy import engine_from_config, pool + +from alembic import context + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# Note: Import models in env.py only when needed to avoid side effects +# +# TODO: Import your SQLAlchemy Base metadata here for autogenerate support. +# Since this PR adds database.py with a Base class, you should import it: +# +# from app.database import Base +# target_metadata = Base.metadata +# +# Without this, Alembic cannot auto-generate migrations from your models. +target_metadata = None + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + # TODO: Update to read DATABASE_URL from environment variable or app.database.get_database_url() + # Current implementation expects sqlalchemy.url in alembic.ini which doesn't support + # environment variables. Consider adding: url = os.getenv('DATABASE_URL') or config.get_main_option(...) + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 0000000..55df286 --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/app/database.py b/app/database.py new file mode 100644 index 0000000..2f31afb --- /dev/null +++ b/app/database.py @@ -0,0 +1,87 @@ +"""Database configuration and session management.""" + +import logging +import os +from urllib.parse import quote_plus + +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine +from sqlalchemy.orm import declarative_base + +logger = logging.getLogger(__name__) + + +def get_database_url() -> str: + """ + Get database URL from environment variables. + + Priority: DATABASE_URL > constructed from individual variables + + Returns: + str: Database connection URL + + Raises: + RuntimeError: If required environment variables are missing + """ + database_url = os.getenv("DATABASE_URL") + if database_url: + return database_url + + # Construct from individual variables + db_host = os.getenv("DATABASE_HOST") + db_port = os.getenv("DATABASE_PORT", "54320") # Default PostgreSQL port + db_user = os.getenv("DATABASE_USER") + db_pass = os.getenv("DATABASE_PASSWORD") + db_name = os.getenv("DATABASE_NAME") + + # TODO: Improve validation to check for empty strings explicitly + # Current check 'if not value' treats empty string as missing + missing_vars = [ + name + for name, value in [ + ("DATABASE_HOST", db_host), + ("DATABASE_USER", db_user), + ("DATABASE_PASSWORD", db_pass), + ("DATABASE_NAME", db_name), + ] + if not value + ] + + if missing_vars: + raise RuntimeError( + f"Database configuration is incomplete. Missing environment variables: {', '.join(missing_vars)}" + ) + + # URL-encode username and password to handle special characters like '@', ':', '/' + db_user_encoded = quote_plus(db_user) + db_pass_encoded = quote_plus(db_pass) + + return f"postgresql+psycopg://{db_user_encoded}:{db_pass_encoded}@{db_host}:{db_port}/{db_name}" + + +# TODO: Consider lazy initialization to avoid executing during module import +# This would prevent database connection issues from failing tests that don't use the database +# Get database URL using the shared function +DATABASE_URL = get_database_url() + +# Create SQLAlchemy async engine +engine = create_async_engine( + DATABASE_URL, + pool_pre_ping=True, + pool_size=10, + max_overflow=20, +) +# Async session factory +SessionLocal: async_sessionmaker[AsyncSession] = async_sessionmaker( + autocommit=False, + autoflush=False, + expire_on_commit=False, + bind=engine, +) +# Base class for models +Base = declarative_base() + + +async def get_db(): + """Dependency for FastAPI to get async database session.""" + async with SessionLocal() as db: + yield db diff --git a/app/main.py b/app/main.py index ecd8357..519e4a9 100644 --- a/app/main.py +++ b/app/main.py @@ -1,21 +1,48 @@ import json import os -from pathlib import Path import traceback -from typing import Any, Dict import uuid -from fastapi.responses import JSONResponse +from pathlib import Path +from typing import Any + from fastapi import FastAPI, HTTPException +from fastapi.responses import JSONResponse from memu.app import MemoryService -app = FastAPI() -service = MemoryService(llm_config={"api_key": os.getenv("OPENAI_API_KEY")}) +from app.database import get_database_url + +app = FastAPI(title="memU Server", version="0.1.0") -storage_dir = Path(os.getenv("MEMU_STORAGE_DIR", "./data")) +# Ensure required environment variables are set +openai_api_key = os.getenv("OPENAI_API_KEY") +if not openai_api_key: + raise RuntimeError( + "OPENAI_API_KEY environment variable is not set or is empty. " + "Set OPENAI_API_KEY to a valid OpenAI API key before starting the server." + ) + +# Get database URL using shared configuration utility +database_url = get_database_url() + +service = MemoryService( + llm_profiles={ + "default": { + "provider": "openai", + "api_key": openai_api_key, + "base_url": os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"), + "model": os.getenv("DEFAULT_LLM_MODEL", "gpt-4o-mini"), + } + }, + database_config={"url": database_url}, +) + +# Storage directory for conversation files +storage_dir = Path(os.getenv("STORAGE_PATH", "./data")) storage_dir.mkdir(parents=True, exist_ok=True) + @app.post("/memorize") -async def memorize(payload: Dict[str, Any]): +async def memorize(payload: dict[str, Any]): try: file_path = storage_dir / f"conversation-{uuid.uuid4().hex}.json" with file_path.open("w", encoding="utf-8") as f: @@ -25,17 +52,18 @@ async def memorize(payload: Dict[str, Any]): return JSONResponse(content={"status": "success", "result": result}) except Exception as exc: traceback.print_exc() - raise HTTPException(status_code=500, detail=str(exc)) + raise HTTPException(status_code=500, detail=str(exc)) from exc + @app.post("/retrieve") -async def retrieve(payload: Dict[str, Any]): +async def retrieve(payload: dict[str, Any]): if "query" not in payload: raise HTTPException(status_code=400, detail="Missing 'query' in request body") try: result = await service.retrieve([payload["query"]]) return JSONResponse(content={"status": "success", "result": result}) except Exception as exc: - raise HTTPException(status_code=500, detail=str(exc)) + raise HTTPException(status_code=500, detail=str(exc)) from exc @app.get("/") From 442f07ca42ae860e9b787afa7ea11e826361db64 Mon Sep 17 00:00:00 2001 From: Spark Zou Date: Sun, 1 Feb 2026 06:31:05 +0900 Subject: [PATCH 02/10] fix: address code review issues for database migration - Fix DATABASE_PORT default from 54320 to standard 5432 - Update alembic/env.py to read DATABASE_URL from environment variables - Use sync engine in alembic for migrations (not async) - Add type assertions for mypy compatibility - Ignore DEP003 for app package in deptry config --- alembic/env.py | 50 +++++++++++++++++++++++++++++++++++++++---------- app/database.py | 19 +++++++++++++------ pyproject.toml | 3 +++ 3 files changed, 56 insertions(+), 16 deletions(-) diff --git a/alembic/env.py b/alembic/env.py index bd13eee..c8ddc1e 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -1,12 +1,44 @@ """Alembic migration environment configuration.""" +import os + # pylint: disable=no-member from logging.config import fileConfig -from sqlalchemy import engine_from_config, pool +from sqlalchemy import pool from alembic import context + +def get_sync_database_url() -> str: + """ + Get synchronous database URL for Alembic migrations. + + Alembic uses synchronous database connections, so we need to convert + the async URL (postgresql+psycopg://) to sync format (postgresql+psycopg://) + or use a sync driver. + + Returns: + str: Synchronous database connection URL + """ + # First try DATABASE_URL from environment + database_url = os.getenv("DATABASE_URL") + if database_url: + # Convert async URL to sync if needed + # postgresql+psycopg:// works for both sync and async with psycopg3 + return database_url + + # Construct from individual variables + db_host = os.getenv("DATABASE_HOST", "localhost") + db_port = os.getenv("DATABASE_PORT", "5432") + db_user = os.getenv("DATABASE_USER", "postgres") + db_pass = os.getenv("DATABASE_PASSWORD", "postgres") + db_name = os.getenv("DATABASE_NAME", "memu") + + # Use psycopg (sync) for Alembic migrations + return f"postgresql+psycopg://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}" + + # this is the Alembic Config object, which provides # access to the values within the .ini file in use. config = context.config @@ -47,10 +79,8 @@ def run_migrations_offline() -> None: script output. """ - # TODO: Update to read DATABASE_URL from environment variable or app.database.get_database_url() - # Current implementation expects sqlalchemy.url in alembic.ini which doesn't support - # environment variables. Consider adding: url = os.getenv('DATABASE_URL') or config.get_main_option(...) - url = config.get_main_option("sqlalchemy.url") + # Get URL from environment variables + url = get_sync_database_url() context.configure( url=url, target_metadata=target_metadata, @@ -69,11 +99,11 @@ def run_migrations_online() -> None: and associate a connection with the context. """ - connectable = engine_from_config( - config.get_section(config.config_ini_section, {}), - prefix="sqlalchemy.", - poolclass=pool.NullPool, - ) + from sqlalchemy import create_engine + + # Get URL from environment variables and create engine directly + url = get_sync_database_url() + connectable = create_engine(url, poolclass=pool.NullPool) with connectable.connect() as connection: context.configure(connection=connection, target_metadata=target_metadata) diff --git a/app/database.py b/app/database.py index 2f31afb..8dcacb2 100644 --- a/app/database.py +++ b/app/database.py @@ -13,22 +13,22 @@ def get_database_url() -> str: """ Get database URL from environment variables. - + Priority: DATABASE_URL > constructed from individual variables - + Returns: str: Database connection URL - + Raises: RuntimeError: If required environment variables are missing """ database_url = os.getenv("DATABASE_URL") if database_url: return database_url - + # Construct from individual variables db_host = os.getenv("DATABASE_HOST") - db_port = os.getenv("DATABASE_PORT", "54320") # Default PostgreSQL port + db_port = os.getenv("DATABASE_PORT", "5432") # Default PostgreSQL port db_user = os.getenv("DATABASE_USER") db_pass = os.getenv("DATABASE_PASSWORD") db_name = os.getenv("DATABASE_NAME") @@ -51,10 +51,17 @@ def get_database_url() -> str: f"Database configuration is incomplete. Missing environment variables: {', '.join(missing_vars)}" ) + # At this point, we know db_user, db_pass, db_host, db_name are not None + # Use assertion to help mypy understand this + assert db_user is not None + assert db_pass is not None + assert db_host is not None + assert db_name is not None + # URL-encode username and password to handle special characters like '@', ':', '/' db_user_encoded = quote_plus(db_user) db_pass_encoded = quote_plus(db_pass) - + return f"postgresql+psycopg://{db_user_encoded}:{db_pass_encoded}@{db_host}:{db_port}/{db_name}" diff --git a/pyproject.toml b/pyproject.toml index f09729d..7ef794a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -121,6 +121,9 @@ DEP002 = [ "python-dotenv", # Environment variables (future use) "pendulum", # Date/time handling (future use) ] +DEP003 = [ + "app", # Project's own package, not an external dependency +] [tool.mypy] python_version = "3.13" From 9848dfde493deda8b202c318670d9e434550fa49 Mon Sep 17 00:00:00 2001 From: Spark Zou Date: Sun, 1 Feb 2026 07:08:27 +0900 Subject: [PATCH 03/10] fix: address additional code review feedback - Remove unused logger import in app/database.py - Add URL encoding for credentials in alembic/env.py - Convert async DB URL to sync for Alembic migrations --- alembic/env.py | 16 +++++++++++----- app/database.py | 3 --- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/alembic/env.py b/alembic/env.py index c8ddc1e..8cac7f0 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -4,6 +4,7 @@ # pylint: disable=no-member from logging.config import fileConfig +from urllib.parse import quote_plus from sqlalchemy import pool @@ -15,8 +16,7 @@ def get_sync_database_url() -> str: Get synchronous database URL for Alembic migrations. Alembic uses synchronous database connections, so we need to convert - the async URL (postgresql+psycopg://) to sync format (postgresql+psycopg://) - or use a sync driver. + the async URL (postgresql+asyncpg://) to sync format (postgresql+psycopg://). Returns: str: Synchronous database connection URL @@ -24,8 +24,10 @@ def get_sync_database_url() -> str: # First try DATABASE_URL from environment database_url = os.getenv("DATABASE_URL") if database_url: - # Convert async URL to sync if needed - # postgresql+psycopg:// works for both sync and async with psycopg3 + # Convert async driver to sync driver if needed + # postgresql+asyncpg:// -> postgresql+psycopg:// + if "+asyncpg" in database_url: + database_url = database_url.replace("+asyncpg", "+psycopg") return database_url # Construct from individual variables @@ -35,8 +37,12 @@ def get_sync_database_url() -> str: db_pass = os.getenv("DATABASE_PASSWORD", "postgres") db_name = os.getenv("DATABASE_NAME", "memu") + # URL-encode username and password to handle special characters + db_user_encoded = quote_plus(db_user) + db_pass_encoded = quote_plus(db_pass) + # Use psycopg (sync) for Alembic migrations - return f"postgresql+psycopg://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}" + return f"postgresql+psycopg://{db_user_encoded}:{db_pass_encoded}@{db_host}:{db_port}/{db_name}" # this is the Alembic Config object, which provides diff --git a/app/database.py b/app/database.py index 8dcacb2..acbe799 100644 --- a/app/database.py +++ b/app/database.py @@ -1,14 +1,11 @@ """Database configuration and session management.""" -import logging import os from urllib.parse import quote_plus from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine from sqlalchemy.orm import declarative_base -logger = logging.getLogger(__name__) - def get_database_url() -> str: """ From db3dce8c1bae6194fc2b8cc8cccf0edc72a7a80c Mon Sep 17 00:00:00 2001 From: Spark Zou Date: Sun, 1 Feb 2026 07:52:21 +0900 Subject: [PATCH 04/10] fix: address code review feedback - Normalize PostgreSQL URL schemes in app/database.py and alembic/env.py - Handle postgres://, postgresql://, postgresql+asyncpg:// formats - Convert all to postgresql+psycopg:// for consistency - Add URL encoding for credentials in alembic/env.py - Update alembic.ini comments to reflect actual implementation - Add backward compatibility for MEMU_STORAGE_DIR env var in app/main.py --- alembic.ini | 16 ++++++++-------- alembic/env.py | 12 +++++++++++- app/database.py | 8 ++++++++ app/main.py | 3 ++- 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/alembic.ini b/alembic.ini index 0f3e809..1958d0b 100644 --- a/alembic.ini +++ b/alembic.ini @@ -57,17 +57,17 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # are written from script.py.mako # output_encoding = utf-8 -# Example: configure the SQLAlchemy database URL. +# Database URL Configuration # -# Note: Alembic's ini file does NOT support shell-style ${VAR} interpolation. -# TODO: Implement environment variable support in alembic/env.py to read DATABASE_URL -# If you want to use environment variables, you have two common options: +# Note: This project reads the database URL from environment variables in alembic/env.py. +# The sqlalchemy.url setting is intentionally left unset here. # -# 1) Use ConfigParser-style interpolation (requires setting these in env.py): -# sqlalchemy.url = postgresql+psycopg://%(DATABASE_USER)s:%(DATABASE_PASSWORD)s@%(DATABASE_HOST)s:%(DATABASE_PORT)s/%(DATABASE_NAME)s +# Supported environment variables (in priority order): +# 1) DATABASE_URL - Full connection URL (e.g., postgresql+psycopg://user:pass@host:5432/db) +# 2) Individual variables: DATABASE_HOST, DATABASE_PORT, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME # -# 2) Omit sqlalchemy.url here and instead set the SQLALCHEMY_URL environment -# variable or pass the URL on the Alembic command line. +# The env.py automatically normalizes URL schemes (postgres://, postgresql://, postgresql+asyncpg://) +# to postgresql+psycopg:// for Alembic migrations. # # sqlalchemy.url = diff --git a/alembic/env.py b/alembic/env.py index 8cac7f0..49916c0 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -24,10 +24,20 @@ def get_sync_database_url() -> str: # First try DATABASE_URL from environment database_url = os.getenv("DATABASE_URL") if database_url: + # Normalize common PostgreSQL DSNs to use the psycopg (sync) driver. + # Handle bare postgres:// and postgresql:// URLs that don't specify a driver. + if database_url.startswith("postgres://"): + # postgres://user:pass@host/db -> postgresql+psycopg://user:pass@host/db + database_url = "postgresql+psycopg://" + database_url[len("postgres://") :] + elif database_url.startswith("postgresql://") and not database_url.startswith("postgresql+"): + # postgresql://user:pass@host/db -> postgresql+psycopg://user:pass@host/db + database_url = "postgresql+psycopg://" + database_url[len("postgresql://") :] + # Convert async driver to sync driver if needed # postgresql+asyncpg:// -> postgresql+psycopg:// if "+asyncpg" in database_url: - database_url = database_url.replace("+asyncpg", "+psycopg") + database_url = database_url.replace("+asyncpg", "+psycopg", 1) + return database_url # Construct from individual variables diff --git a/app/database.py b/app/database.py index acbe799..3b745a1 100644 --- a/app/database.py +++ b/app/database.py @@ -21,6 +21,14 @@ def get_database_url() -> str: """ database_url = os.getenv("DATABASE_URL") if database_url: + # Normalize common PostgreSQL DSNs to use the psycopg async driver. + # Handle bare postgres:// and postgresql:// URLs that don't specify a driver. + if database_url.startswith("postgres://"): + # postgres://user:pass@host/db -> postgresql+psycopg://user:pass@host/db + database_url = "postgresql+psycopg://" + database_url[len("postgres://") :] + elif database_url.startswith("postgresql://") and not database_url.startswith("postgresql+"): + # postgresql://user:pass@host/db -> postgresql+psycopg://user:pass@host/db + database_url = "postgresql+psycopg://" + database_url[len("postgresql://") :] return database_url # Construct from individual variables diff --git a/app/main.py b/app/main.py index 519e4a9..b59db47 100644 --- a/app/main.py +++ b/app/main.py @@ -37,7 +37,8 @@ ) # Storage directory for conversation files -storage_dir = Path(os.getenv("STORAGE_PATH", "./data")) +# Support both new STORAGE_PATH and legacy MEMU_STORAGE_DIR for backward compatibility +storage_dir = Path(os.getenv("STORAGE_PATH") or os.getenv("MEMU_STORAGE_DIR") or "./data") storage_dir.mkdir(parents=True, exist_ok=True) From 5f5d005f9d0230c3b333a1aa212ec5518bf72312 Mon Sep 17 00:00:00 2001 From: Spark Zou Date: Sun, 1 Feb 2026 07:57:43 +0900 Subject: [PATCH 05/10] chore: add .gitkeep to preserve alembic/versions directory --- alembic/versions/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 alembic/versions/.gitkeep diff --git a/alembic/versions/.gitkeep b/alembic/versions/.gitkeep new file mode 100644 index 0000000..e69de29 From 481a4cc2472430b06d6de9f776a6b4f3ae6f847d Mon Sep 17 00:00:00 2001 From: Spark Zou Date: Sun, 1 Feb 2026 08:22:53 +0900 Subject: [PATCH 06/10] fix: improve URL encoding and asyncpg conversion - Use quote(..., safe='') instead of quote_plus() for URL userinfo encoding (quote_plus encodes spaces as '+' which is incorrect for URL userinfo) - Add asyncpg to psycopg conversion in app/database.py (consistent with alembic/env.py, asyncpg is not a project dependency) --- alembic/env.py | 7 ++++--- app/database.py | 13 ++++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/alembic/env.py b/alembic/env.py index 49916c0..0889706 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -4,7 +4,7 @@ # pylint: disable=no-member from logging.config import fileConfig -from urllib.parse import quote_plus +from urllib.parse import quote from sqlalchemy import pool @@ -48,8 +48,9 @@ def get_sync_database_url() -> str: db_name = os.getenv("DATABASE_NAME", "memu") # URL-encode username and password to handle special characters - db_user_encoded = quote_plus(db_user) - db_pass_encoded = quote_plus(db_pass) + # Use quote(..., safe="") instead of quote_plus() for URL userinfo section + db_user_encoded = quote(db_user, safe="") + db_pass_encoded = quote(db_pass, safe="") # Use psycopg (sync) for Alembic migrations return f"postgresql+psycopg://{db_user_encoded}:{db_pass_encoded}@{db_host}:{db_port}/{db_name}" diff --git a/app/database.py b/app/database.py index 3b745a1..6168231 100644 --- a/app/database.py +++ b/app/database.py @@ -1,7 +1,7 @@ """Database configuration and session management.""" import os -from urllib.parse import quote_plus +from urllib.parse import quote from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine from sqlalchemy.orm import declarative_base @@ -29,6 +29,12 @@ def get_database_url() -> str: elif database_url.startswith("postgresql://") and not database_url.startswith("postgresql+"): # postgresql://user:pass@host/db -> postgresql+psycopg://user:pass@host/db database_url = "postgresql+psycopg://" + database_url[len("postgresql://") :] + + # Convert asyncpg driver to psycopg if needed (asyncpg is not a project dependency) + # postgresql+asyncpg:// -> postgresql+psycopg:// + if "+asyncpg" in database_url: + database_url = database_url.replace("+asyncpg", "+psycopg", 1) + return database_url # Construct from individual variables @@ -64,8 +70,9 @@ def get_database_url() -> str: assert db_name is not None # URL-encode username and password to handle special characters like '@', ':', '/' - db_user_encoded = quote_plus(db_user) - db_pass_encoded = quote_plus(db_pass) + # Use quote(..., safe="") instead of quote_plus() for URL userinfo section + db_user_encoded = quote(db_user, safe="") + db_pass_encoded = quote(db_pass, safe="") return f"postgresql+psycopg://{db_user_encoded}:{db_pass_encoded}@{db_host}:{db_port}/{db_name}" From 686972a59da43552fc6306ed2a8a9d2d0ea69f6a Mon Sep 17 00:00:00 2001 From: Spark Zou Date: Sun, 1 Feb 2026 08:47:26 +0900 Subject: [PATCH 07/10] refactor: extract Base to side-effect-free module for Alembic autogenerate - Create app/models/base.py with Base class (no DB connection on import) - Update app/database.py to import Base from app.models.base - Update alembic/env.py to set target_metadata = Base.metadata - This enables 'alembic revision --autogenerate' to detect model changes --- alembic/env.py | 18 ++++++------------ app/database.py | 7 ++++--- app/models/base.py | 14 ++++++++++++++ 3 files changed, 24 insertions(+), 15 deletions(-) create mode 100644 app/models/base.py diff --git a/alembic/env.py b/alembic/env.py index 0889706..a304b93 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -10,6 +10,11 @@ from alembic import context +# Import Base metadata for autogenerate support +# Note: We import from app.models.base which is side-effect free +# (doesn't create database connections or read environment variables) +from app.models.base import Base + def get_sync_database_url() -> str: """ @@ -65,18 +70,7 @@ def get_sync_database_url() -> str: if config.config_file_name is not None: fileConfig(config.config_file_name) -# add your model's MetaData object here -# for 'autogenerate' support -# Note: Import models in env.py only when needed to avoid side effects -# -# TODO: Import your SQLAlchemy Base metadata here for autogenerate support. -# Since this PR adds database.py with a Base class, you should import it: -# -# from app.database import Base -# target_metadata = Base.metadata -# -# Without this, Alembic cannot auto-generate migrations from your models. -target_metadata = None +target_metadata = Base.metadata # other values from the config, defined by the needs of env.py, # can be acquired: diff --git a/app/database.py b/app/database.py index 6168231..fbb8181 100644 --- a/app/database.py +++ b/app/database.py @@ -4,7 +4,8 @@ from urllib.parse import quote from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine -from sqlalchemy.orm import declarative_base + +from app.models.base import Base def get_database_url() -> str: @@ -96,8 +97,8 @@ def get_database_url() -> str: expire_on_commit=False, bind=engine, ) -# Base class for models -Base = declarative_base() +# Re-export Base for backward compatibility +__all__ = ["Base", "SessionLocal", "engine", "get_db", "get_database_url"] async def get_db(): diff --git a/app/models/base.py b/app/models/base.py new file mode 100644 index 0000000..1e0e9b3 --- /dev/null +++ b/app/models/base.py @@ -0,0 +1,14 @@ +"""SQLAlchemy Base class for model definitions. + +This module is intentionally side-effect free - it only defines the Base class +without creating any database connections or reading environment variables. +This allows safe imports from alembic/env.py for migration autogeneration. +""" + +from sqlalchemy.orm import DeclarativeBase + + +class Base(DeclarativeBase): + """Base class for all SQLAlchemy models.""" + + pass From bf15ed82824ac291e7cc8228ecdaf867d889a973 Mon Sep 17 00:00:00 2001 From: Spark Zou Date: Sun, 1 Feb 2026 14:55:52 +0900 Subject: [PATCH 08/10] fix: remove unsupported autocommit param from async_sessionmaker SQLAlchemy 2.x async_sessionmaker does not support the autocommit argument. Use explicit transaction scopes instead. --- app/database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/database.py b/app/database.py index fbb8181..98c4791 100644 --- a/app/database.py +++ b/app/database.py @@ -91,8 +91,8 @@ def get_database_url() -> str: max_overflow=20, ) # Async session factory +# Note: autocommit is removed as it's not supported in SQLAlchemy 2.x async_sessionmaker SessionLocal: async_sessionmaker[AsyncSession] = async_sessionmaker( - autocommit=False, autoflush=False, expire_on_commit=False, bind=engine, From b5c01550ac08f7bd84d50a861ea073142727cc71 Mon Sep 17 00:00:00 2001 From: Spark Zou Date: Sun, 1 Feb 2026 15:16:38 +0900 Subject: [PATCH 09/10] fix: use startswith() for safer asyncpg URL detection Replace 'in' operator with startswith() to avoid false positives when checking for asyncpg driver in database URLs. This ensures only URLs that actually start with postgresql+asyncpg:// are converted. --- alembic.ini | 2 +- alembic/env.py | 4 ++-- app/database.py | 4 ++-- app/models/base.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/alembic.ini b/alembic.ini index 1958d0b..db5a06f 100644 --- a/alembic.ini +++ b/alembic.ini @@ -15,7 +15,7 @@ prepend_sys_path = . # timezone to use when rendering the date within the migration file # as well as the filename. # If specified, requires the python>=3.9 or backports.zoneinfo library. -# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# Any required deps can be installed by adding `alembic[tz]` to the pip requirements # string value is passed to ZoneInfo() # leave blank for localtime # timezone = diff --git a/alembic/env.py b/alembic/env.py index a304b93..8052ea3 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -40,8 +40,8 @@ def get_sync_database_url() -> str: # Convert async driver to sync driver if needed # postgresql+asyncpg:// -> postgresql+psycopg:// - if "+asyncpg" in database_url: - database_url = database_url.replace("+asyncpg", "+psycopg", 1) + if database_url.startswith("postgresql+asyncpg://"): + database_url = "postgresql+psycopg://" + database_url[len("postgresql+asyncpg://") :] return database_url diff --git a/app/database.py b/app/database.py index 98c4791..ace0607 100644 --- a/app/database.py +++ b/app/database.py @@ -33,8 +33,8 @@ def get_database_url() -> str: # Convert asyncpg driver to psycopg if needed (asyncpg is not a project dependency) # postgresql+asyncpg:// -> postgresql+psycopg:// - if "+asyncpg" in database_url: - database_url = database_url.replace("+asyncpg", "+psycopg", 1) + if database_url.startswith("postgresql+asyncpg://"): + database_url = "postgresql+psycopg://" + database_url[len("postgresql+asyncpg://") :] return database_url diff --git a/app/models/base.py b/app/models/base.py index 1e0e9b3..a0916be 100644 --- a/app/models/base.py +++ b/app/models/base.py @@ -1,6 +1,6 @@ """SQLAlchemy Base class for model definitions. -This module is intentionally side-effect free - it only defines the Base class +This module is intentionally side-effect-free - it only defines the Base class without creating any database connections or reading environment variables. This allows safe imports from alembic/env.py for migration autogeneration. """ From d858966ae5ea34a18d9e72e9f3ecf3334b182c31 Mon Sep 17 00:00:00 2001 From: Spark Zou Date: Sun, 1 Feb 2026 16:15:43 +0900 Subject: [PATCH 10/10] fix: align alembic/env.py with app/database.py for env var validation Remove hard-coded defaults from get_sync_database_url() and add explicit validation for required environment variables. This ensures consistent behavior between Alembic migrations and the application, preventing accidental connections to unintended databases. --- alembic/env.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/alembic/env.py b/alembic/env.py index 8052ea3..67b02d5 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -46,11 +46,34 @@ def get_sync_database_url() -> str: return database_url # Construct from individual variables - db_host = os.getenv("DATABASE_HOST", "localhost") - db_port = os.getenv("DATABASE_PORT", "5432") - db_user = os.getenv("DATABASE_USER", "postgres") - db_pass = os.getenv("DATABASE_PASSWORD", "postgres") - db_name = os.getenv("DATABASE_NAME", "memu") + db_host = os.getenv("DATABASE_HOST") + db_port = os.getenv("DATABASE_PORT", "5432") # Default PostgreSQL port + db_user = os.getenv("DATABASE_USER") + db_pass = os.getenv("DATABASE_PASSWORD") + db_name = os.getenv("DATABASE_NAME") + + # Validate required environment variables (consistent with app/database.py) + missing_vars = [ + name + for name, value in [ + ("DATABASE_HOST", db_host), + ("DATABASE_USER", db_user), + ("DATABASE_PASSWORD", db_pass), + ("DATABASE_NAME", db_name), + ] + if not value + ] + + if missing_vars: + raise RuntimeError( + f"Database configuration is incomplete. Missing environment variables: {', '.join(missing_vars)}" + ) + + # At this point, we know these are not None + assert db_host is not None + assert db_user is not None + assert db_pass is not None + assert db_name is not None # URL-encode username and password to handle special characters # Use quote(..., safe="") instead of quote_plus() for URL userinfo section