diff --git a/backend/app/routes/migration_routes.py b/backend/app/routes/migration_routes.py index 75bc85b..e1e8ab6 100644 --- a/backend/app/routes/migration_routes.py +++ b/backend/app/routes/migration_routes.py @@ -119,3 +119,38 @@ async def execute_migrations( return {"status": "ok"} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) from e + + +@router.get("/connection-url/{tenant_id}") +async def get_tenant_connection_url( + tenant_id: UUID, + include_public: bool = False, + # admin=Depends(get_current_admin), +) -> dict: + """ + Get a PostgreSQL connection URL for a specific tenant. + + This URL is scoped to only show the tenant's generated tables. + + Query params: + include_public: If true, also include public schema (for shared tables) + + Example: + GET /migrations/connection-url/{tenant_id} + GET /migrations/connection-url/{tenant_id}?include_public=true + """ + from app.utils.tenant_connection import get_schema_name, get_tenant_connection_url + + try: + url = get_tenant_connection_url(tenant_id, include_public) + schema = get_schema_name(tenant_id) + + return { + "tenant_id": str(tenant_id), + "schema_name": schema, + "connection_url": url, + "includes_public_schema": include_public, + "note": "Use this URL to connect and see only this tenant's generated tables", + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) from e diff --git a/backend/app/utils/migrations.py b/backend/app/utils/migrations.py index 9bff451..bfe8a39 100644 --- a/backend/app/utils/migrations.py +++ b/backend/app/utils/migrations.py @@ -9,11 +9,18 @@ def _table_name_for_classification(c: Classification) -> str: """ Deterministic mapping from classification name to SQL table name. Example: "Robot Specifications" -> "robotspecifications" - You can make this smarter later (snake_case, etc). """ return c.name.replace(" ", "").lower() +def _get_schema_name(tenant_id) -> str: + """ + Generate schema name from tenant_id. + Example: tenant_7b21599b_3518_401e_a70a_5fe28d4000e3 + """ + return f"tenant_{str(tenant_id).replace('-', '_')}" + + def create_migrations( classifications: list[Classification], relationships: list[Relationship], @@ -29,31 +36,57 @@ def create_migrations( Returns: - list[MigrationCreate] = new migrations to append on top + + NOW WITH SCHEMA-PER-TENANT: + - First migration creates the tenant schema + - All tables are created within that schema """ + if not classifications: + return [] + existing_names = {m.name for m in initial_migrations} - # determine the next sequence number + # Determine the next sequence number base_sequence = max((m.sequence for m in initial_migrations), default=0) next_seq = base_sequence + 1 new_migrations: list[MigrationCreate] = [] - # 1) Table-creation migrations from classifications + # All classifications belong to the same tenant + tenant_id = classifications[0].tenant_id + schema_name = _get_schema_name(tenant_id) + + # ===== STEP 1: CREATE SCHEMA ===== + schema_migration_name = f"create_schema_{schema_name}" + + if schema_migration_name not in existing_names: + new_migrations.append( + MigrationCreate( + tenant_id=tenant_id, + name=schema_migration_name, + sql=f"CREATE SCHEMA IF NOT EXISTS {schema_name};", + sequence=next_seq, + ) + ) + existing_names.add(schema_migration_name) + next_seq += 1 + + # ===== STEP 2: CREATE TABLES (in tenant schema) ===== for c in classifications: table_name = _table_name_for_classification(c) - mig_name = f"create_table_{table_name}" + qualified_table_name = f"{schema_name}.{table_name}" + mig_name = f"create_table_{schema_name}_{table_name}" if mig_name in existing_names: - # migration already exists, skip continue sql = f""" - CREATE TABLE IF NOT EXISTS {table_name} ( - id UUID PRIMARY KEY, - tenant_id UUID NOT NULL, - -- minimal example: store all extracted content as JSONB - data JSONB NOT NULL - ); + CREATE TABLE IF NOT EXISTS {qualified_table_name} ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + tenant_id UUID NOT NULL, + data JSONB NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW() +); """.strip() new_migrations.append( @@ -67,51 +100,56 @@ def create_migrations( existing_names.add(mig_name) next_seq += 1 - # 2) Relationship-based migrations (FKs / join tables) + # ===== STEP 3: CREATE RELATIONSHIPS (in tenant schema) ===== for rel in relationships: from_table = _table_name_for_classification(rel.from_classification) to_table = _table_name_for_classification(rel.to_classification) + qualified_from = f"{schema_name}.{from_table}" + qualified_to = f"{schema_name}.{to_table}" + # Support both Enum and plain string for rel.type rel_type = getattr(rel.type, "value", rel.type) - mig_name = f"rel_{rel_type.lower()}_{from_table}_{to_table}" + mig_name = f"rel_{rel_type.lower()}_{schema_name}_{from_table}_{to_table}" if mig_name in existing_names: continue if rel_type == "ONE_TO_MANY": sql = f""" - ALTER TABLE {from_table} - ADD COLUMN IF NOT EXISTS {to_table}_id UUID, - ADD CONSTRAINT fk_{from_table}_{to_table} - FOREIGN KEY ({to_table}_id) - REFERENCES {to_table}(id); - """.strip() + ALTER TABLE {qualified_from} + ADD COLUMN IF NOT EXISTS {to_table}_id UUID, + ADD CONSTRAINT fk_{schema_name}_{from_table}_{to_table} + FOREIGN KEY ({to_table}_id) + REFERENCES {qualified_to}(id); + """.strip() elif rel_type == "ONE_TO_ONE": sql = f""" - ALTER TABLE {from_table} - ADD COLUMN IF NOT EXISTS {to_table}_id UUID UNIQUE, - ADD CONSTRAINT fk_{from_table}_{to_table} - FOREIGN KEY ({to_table}_id) - REFERENCES {to_table}(id); - """.strip() + ALTER TABLE {qualified_from} + ADD COLUMN IF NOT EXISTS {to_table}_id UUID UNIQUE, + ADD CONSTRAINT fk_{schema_name}_{from_table}_{to_table} + FOREIGN KEY ({to_table}_id) + REFERENCES {qualified_to}(id); + """.strip() elif rel_type == "MANY_TO_MANY": join_table = f"{from_table}_{to_table}_join" + qualified_join = f"{schema_name}.{join_table}" + sql = f""" - CREATE TABLE IF NOT EXISTS {join_table} ( - id UUID PRIMARY KEY, + CREATE TABLE IF NOT EXISTS {qualified_join} ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), {from_table}_id UUID NOT NULL, {to_table}_id UUID NOT NULL, - CONSTRAINT fk_{join_table}_{from_table} + CONSTRAINT fk_{schema_name}_{join_table}_{from_table} FOREIGN KEY ({from_table}_id) - REFERENCES {from_table}(id), - CONSTRAINT fk_{join_table}_{to_table} + REFERENCES {qualified_from}(id), + CONSTRAINT fk_{schema_name}_{join_table}_{to_table} FOREIGN KEY ({to_table}_id) - REFERENCES {to_table}(id), - CONSTRAINT uniq_{join_table} + REFERENCES {qualified_to}(id), + CONSTRAINT uniq_{schema_name}_{join_table} UNIQUE ({from_table}_id, {to_table}_id) ); """.strip() diff --git a/backend/app/utils/tenant_connection.py b/backend/app/utils/tenant_connection.py new file mode 100644 index 0000000..633202d --- /dev/null +++ b/backend/app/utils/tenant_connection.py @@ -0,0 +1,60 @@ +# app/utils/tenant_connection.py + +import os +from urllib.parse import quote +from uuid import UUID + + +def get_schema_name(tenant_id: UUID) -> str: + """ + Generate schema name from tenant_id. + + Example: + tenant_id: 7b21599b-3518-401e-a70a-5fe28d4000e3 + returns: tenant_7b21599b_3518_401e_a70a_5fe28d4000e3 + """ + return f"tenant_{str(tenant_id).replace('-', '_')}" + + +def get_tenant_connection_url(tenant_id: UUID, include_public: bool = False) -> str: + """ + Generate a PostgreSQL connection URL scoped to a specific tenant's schema. + + Args: + tenant_id: The tenant's UUID + include_public: If True, also include public schema in search_path + (allows access to shared tables like tenants, users) + + Returns: + Connection URL with search_path set to tenant's schema + + Example output: + postgresql://postgres:postgres@localhost:54322/postgres?options=-c%20search_path%3Dtenant_abc123 + """ + # Get base database URL from environment + # For local Supabase, this should be the direct postgres connection + database_url = os.getenv( + "DATABASE_URL", "postgresql://postgres:postgres@localhost:54322/postgres" + ) + + schema_name = get_schema_name(tenant_id) + + # Build search_path + if include_public: + # Tenant schema first, then public as fallback + search_path = f"{schema_name},public" + else: + # Only tenant schema (complete isolation) + search_path = schema_name + + # Create PostgreSQL connection option + # -c sets a configuration parameter + # search_path controls which schemas are visible + options = f"-c search_path={search_path}" + + # URL encode the options (spaces become %20, etc.) + encoded_options = quote(options, safe="=,-") + + # Append options to URL + separator = "&" if "?" in database_url else "?" + return f"{database_url}{separator}options={encoded_options}" diff --git a/package-lock.json b/package-lock.json index f61a23f..64c3dd0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,5 +1,5 @@ { - "name": "cortex-etl", + "name": "cortex-etl-source", "version": "1.0.0", "lockfileVersion": 3, "requires": true,