Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 18 additions & 24 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,35 +1,29 @@
name: Test
name: Tests

on:
pull_request:
branches: [ "main" ]
branches: [ main ]

jobs:
test:
name: Run tests
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
strategy:
matrix:
python-version: ["3.11"]

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
steps:
- uses: actions/checkout@v4

- name: Restore pip cache
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"

- name: Run pytest
run: |
pytest -q
- name: Run tests
run: |
pytest -v
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.11.14
116 changes: 43 additions & 73 deletions app/routers/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pydantic import BaseModel
from typing import Optional
from app.services.cache import WhoisCache
from app.services.whois import WhoisService
from app.services.whois import WhoisService, parse_whois
from app.services.rate_limiter import RateLimiter
import logging

Expand Down Expand Up @@ -46,80 +46,33 @@ async def get_whois(
tld = parts[-1]

# 2. Cache
def parse_whois(raw: str, tld: str):
"""Extract statut, creation_date, registrar, pendingDelete, redemptionPeriod for all TLDs.

This is heuristic: we search common WHOIS labels case-insensitively.
Returns a dict with keys 'statut', 'creation_date', 'registrar', 'pendingDelete', 'redemptionPeriod'.
"""
if not raw:
return {
"statut": None,
"creation_date": None,
"registrar": None,
"pendingDelete": False,
"redemptionPeriod": False,
}

raw_lines = [l.strip() for l in raw.splitlines() if l.strip()]
lower = raw.lower()

statut = None
creation_date = None
registrar = None
pendingDelete = False
redemptionPeriod = False

import re

# Common patterns (now generalized for all TLDs)
for line in raw_lines:
l = line.lower()
# Registrar: (ignore Registrar WHOIS Server and Registrar URL)
if registrar is None and l.startswith("registrar:") and not ("whois server" in l or "url" in l):
parts = line.split(":", 1)
if len(parts) == 2:
registrar = parts[1].strip()
continue
# Creation date
if creation_date is None and ("creation date" in l or "created on" in l or "created:" in l or "creation:" in l or "registered on" in l):
parts = line.split(":", 1)
if len(parts) == 2:
creation_date = parts[1].strip()
continue
# Status lines (can have multiple)
if "status:" in l or l.startswith("domain status"):
if statut is None:
parts = line.split(":", 1)
if len(parts) == 2:
statut = parts[1].strip()
# Check for pendingDelete and redemptionPeriod in any status line
if "pendingdelete" in l:
pendingDelete = True
if "redemptionperiod" in l:
redemptionPeriod = True
continue

# Fallback regex for Registrar lines like 'Registrar Name' without colon
if registrar is None:
m = re.search(r"registrar\s+([\w\-\. ]{3,})", raw, re.IGNORECASE)
if m:
registrar = m.group(1).strip()

return {
"statut": statut,
"creation_date": creation_date,
"registrar": registrar,
"pendingDelete": pendingDelete,
"redemptionPeriod": redemptionPeriod,
}
# parser is provided by app.services.whois.parse_whois

if force != 1:
cached_data = cache.get(domain)
if cached_data:
# enrich from raw before removing it
parsed = parse_whois(cached_data.get("raw"), tld)
# ne pas exposer le champ raw dans la réponse JSON
# Prefer parsed fields persisted in DB. Only fallback to parsing raw if fields are missing.
parsed = {
"statut": cached_data.get("statut"),
"creation_date": cached_data.get("creation_date"),
"registrar": cached_data.get("registrar"),
"pendingDelete": cached_data.get("pendingDelete"),
"redemptionPeriod": cached_data.get("redemptionPeriod"),
}
# If any key is missing/None, parse raw as fallback
if not any(v is not None for v in parsed.values()):
parsed = parse_whois(cached_data.get("raw"), tld)
else:
# ensure booleans normalized (could be stored as 0/1)
try:
parsed["pendingDelete"] = bool(int(parsed["pendingDelete"])) if parsed["pendingDelete"] is not None else False
except Exception:
parsed["pendingDelete"] = bool(parsed.get("pendingDelete"))
try:
parsed["redemptionPeriod"] = bool(int(parsed["redemptionPeriod"])) if parsed["redemptionPeriod"] is not None else False
except Exception:
parsed["redemptionPeriod"] = bool(parsed.get("redemptionPeriod"))
# do not expose raw in responses
cached_data.pop("raw", None)
# inject parsed fields so response_model includes them
cached_data.update(parsed)
Expand Down Expand Up @@ -158,8 +111,25 @@ def parse_whois(raw: str, tld: str):
cached_data = cache.get(domain)
if not cached_data:
raise HTTPException(status_code=500, detail="Failed to retrieve data from cache after save")
# enrich from raw before removing it (comme pour le cache hit)
parsed = parse_whois(cached_data.get("raw"), tld)
# Prefer parsed fields persisted in DB. Only fallback to parsing raw if fields are missing.
parsed = {
"statut": cached_data.get("statut"),
"creation_date": cached_data.get("creation_date"),
"registrar": cached_data.get("registrar"),
"pendingDelete": cached_data.get("pendingDelete"),
"redemptionPeriod": cached_data.get("redemptionPeriod"),
}
if not any(v is not None for v in parsed.values()):
parsed = parse_whois(cached_data.get("raw"), tld)
else:
try:
parsed["pendingDelete"] = bool(int(parsed["pendingDelete"])) if parsed["pendingDelete"] is not None else False
except Exception:
parsed["pendingDelete"] = bool(parsed.get("pendingDelete"))
try:
parsed["redemptionPeriod"] = bool(int(parsed["redemptionPeriod"])) if parsed["redemptionPeriod"] is not None else False
except Exception:
parsed["redemptionPeriod"] = bool(parsed.get("redemptionPeriod"))
cached_data.pop("raw", None)
cached_data.update(parsed)
# ensure coherence: if pendingDelete or redemptionPeriod, available must be False
Expand Down
109 changes: 103 additions & 6 deletions app/services/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,32 @@
logger = logging.getLogger(__name__)

class WhoisCache:
def __init__(self):
self.db_path = "data/whois_cache.db"
def __init__(self, db_path: str = None):
# Allow overriding DB path for tests or alternate deployments
self.db_path = db_path or "data/whois_cache.db"
os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
self._init_db()
# Run lightweight migrations/backfill if needed (safe to call on every start)
try:
self._migrate_if_needed()
except Exception:
logger.exception("Migration failed during cache init. Continuing without migration.")

def _init_db(self):
with sqlite3.connect(self.db_path) as conn:
# Create table with parsed fields. For older DBs, migration script will add missing columns.
conn.execute("""
CREATE TABLE IF NOT EXISTS whois_cache (
domain TEXT PRIMARY KEY,
tld TEXT,
available BOOLEAN,
checked_at TEXT,
raw TEXT
raw TEXT,
statut TEXT,
creation_date TEXT,
registrar TEXT,
pendingDelete BOOLEAN,
redemptionPeriod BOOLEAN
)
""")

Expand All @@ -39,15 +51,100 @@ def get(self, domain: str) -> Optional[Dict[str, Any]]:
logger.error(f"Cache error on get({domain}): {e}")
return None
return None
def _ensure_bool(self, val):
# SQLite stores booleans as 0/1 or NULL. Normalize to Python bool where appropriate.
if val is None:
return False
try:
return bool(int(val))
except Exception:
return bool(val)

def set(self, domain: str, tld: str, available: bool, raw: str):
checked_at = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
# parse raw to extract fields to persist
try:
from app.services.whois import parse_whois
parsed = parse_whois(raw, tld)
except Exception:
parsed = {"statut": None, "creation_date": None, "registrar": None, "pendingDelete": False, "redemptionPeriod": False}

try:
with sqlite3.connect(self.db_path) as conn:
conn.execute("""
INSERT OR REPLACE INTO whois_cache (domain, tld, available, checked_at, raw)
VALUES (?, ?, ?, ?, ?)
""", (domain, tld, available, checked_at, raw))
INSERT OR REPLACE INTO whois_cache
(domain, tld, available, checked_at, raw, statut, creation_date, registrar, pendingDelete, redemptionPeriod)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
domain,
tld,
int(bool(available)),
checked_at,
raw,
parsed.get("statut"),
parsed.get("creation_date"),
parsed.get("registrar"),
int(bool(parsed.get("pendingDelete"))),
int(bool(parsed.get("redemptionPeriod")))
))
logger.debug(f"Cache SET for domain: {domain} (checked_at: {checked_at})")
except sqlite3.Error as e:
logger.error(f"Cache error on set({domain}): {e}")
def _migrate_if_needed(self):
"""Detect missing expected columns, add them, and backfill parsed fields from raw."""
EXPECTED = {
"statut": "TEXT",
"creation_date": "TEXT",
"registrar": "TEXT",
"pendingDelete": "BOOLEAN",
"redemptionPeriod": "BOOLEAN",
}

try:
with sqlite3.connect(self.db_path) as conn:
cur = conn.execute("PRAGMA table_info('whois_cache')")
existing = {row[1] for row in cur.fetchall()} # column names
to_add = [(n, t) for n, t in EXPECTED.items() if n not in existing]
if to_add:
logger.info(f"Cache migration: adding columns: {[n for n, _ in to_add]}")
for name, coltype in to_add:
try:
conn.execute(f"ALTER TABLE whois_cache ADD COLUMN {name} {coltype}")
except sqlite3.Error:
logger.exception(f"Failed to add column {name}; continuing")
conn.commit()

# Backfill parsed fields for rows where raw is present and parsed columns are NULL/empty
sel = "SELECT domain, raw, tld FROM whois_cache WHERE raw IS NOT NULL AND (statut IS NULL OR creation_date IS NULL OR registrar IS NULL OR pendingDelete IS NULL OR redemptionPeriod IS NULL)"
rows = conn.execute(sel).fetchall()
if rows:
logger.info(f"Cache migration: backfilling parsed fields for {len(rows)} rows")
# Import parser locally to avoid circular issues
try:
from app.services.whois import parse_whois
except Exception:
logger.exception("Could not import parse_whois for migration; skipping backfill")
return

upd = "UPDATE whois_cache SET statut = ?, creation_date = ?, registrar = ?, pendingDelete = ?, redemptionPeriod = ? WHERE domain = ?"
updated = 0
for domain, raw, tld in rows:
try:
parsed = parse_whois(raw, tld)
conn.execute(upd, (
parsed.get("statut"),
parsed.get("creation_date"),
parsed.get("registrar"),
int(bool(parsed.get("pendingDelete"))),
int(bool(parsed.get("redemptionPeriod"))),
domain,
))
updated += 1
except Exception:
logger.exception(f"Failed to backfill domain {domain}; skipping")
if updated:
conn.commit()
logger.info(f"Cache migration: backfilled {updated} rows")
except sqlite3.Error:
logger.exception("SQLite error during cache migration")
Loading