diff --git a/.github/workflows/build_dev.yml b/.github/workflows/build_dev.yml new file mode 100644 index 0000000..9d39fab --- /dev/null +++ b/.github/workflows/build_dev.yml @@ -0,0 +1,33 @@ +name: Build and Push Docker Images + +on: + push: + branches: + - 'dev' + workflow_dispatch: + +jobs: + api: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Build and push + uses: docker/build-push-action@v5 + with: + push: true + context: ./backend + tags: erasme/ai-proxy:${{ github.ref == 'refs/heads/main' && 'latest' || 'dev' }} \ No newline at end of file diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml new file mode 100644 index 0000000..af9efa5 --- /dev/null +++ b/.github/workflows/build_main.yml @@ -0,0 +1,59 @@ +name: Update Version and build main branch + +on: + pull_request: + types: + - closed + branches: + - main # This ensures that only PRs merged into 'main' will trigger this workflow. + +permissions: + contents: write + +jobs: + create_tag: + runs-on: ubuntu-latest + steps: + ### Step 1: Check out the repository + - name: Checkout Code + if: ${{ github.event.pull_request.merged == true }} + uses: actions/checkout@v3 + + ### Step 2: Configure Git + - name: Configure Git + if: ${{ github.event.pull_request.merged == true }} + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + ### Step 3: Bump the Version and Create Tag + - name: Bump Version and Create Tag + if: ${{ github.event.pull_request.merged == true }} + id: version + uses: anothrNick/github-tag-action@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + INITIAL_VERSION: 0.1.0 + DEFAULT_BUMP: patch + + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Build and push + uses: docker/build-push-action@v5 + with: + push: true + context: ./backend + tags: erasme/ai-proxy:latest , erasme/ai-proxy:${{ steps.version.outputs.new_tag }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 4d69dff..0e9aced 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ config.yaml *.db emissions.csv backend/.~lock.emissions.csv# -data/ \ No newline at end of file +data/ +venv \ No newline at end of file diff --git a/backend/exporter.py b/backend/exporter.py deleted file mode 100644 index 8b25091..0000000 --- a/backend/exporter.py +++ /dev/null @@ -1,211 +0,0 @@ -from prometheus_client import start_http_server, REGISTRY -from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily -from prometheus_client.registry import Collector -import sqlite3 -import time -from typing import Dict, List, Tuple - -class SQLiteCollector(Collector): - def __init__(self, db_path: str): - self.db_path = db_path - - def get_db_connection(self): - return sqlite3.connect(self.db_path) - - def collect(self): - conn = self.get_db_connection() - cursor = conn.cursor() - - try: - # Export cumulative counters (these will be scraped regularly by Prometheus) - yield from self._get_counter_metrics(cursor) - - # Export current state gauges - yield from self._get_gauge_metrics(cursor) - - finally: - conn.close() - - def _get_counter_metrics(self, cursor): - """Export cumulative counters - Prometheus will calculate rates from these""" - - # Total requests by model and user (Counter) - cursor.execute(""" - SELECT model_name, user_name, COUNT(*) as total - FROM requests - GROUP BY model_name, user_name - """) - results = cursor.fetchall() - - metric = CounterMetricFamily( - 'llm_requests_total', - 'Total number of requests by model and user', - labels=['model', 'user'] - ) - for model, user, total in results: - metric.add_metric([model, user], total) - yield metric - - # Total tokens by model and user (Counter) - cursor.execute(""" - SELECT model_name, user_name, COALESCE(SUM(tokens_used), 0) as total_tokens - FROM requests - GROUP BY model_name, user_name - """) - results = cursor.fetchall() - - metric = CounterMetricFamily( - 'llm_tokens_total', - 'Total tokens processed by model and user', - labels=['model', 'user'] - ) - for model, user, tokens in results: - metric.add_metric([model, user], tokens) - yield metric - - # Total CO2 emissions by model and user (Counter) - cursor.execute(""" - SELECT model_name, user_name, COALESCE(SUM(co2_emission), 0) as total_co2 - FROM requests - GROUP BY model_name, user_name - """) - results = cursor.fetchall() - - metric = CounterMetricFamily( - 'llm_co2_grams_total', - 'Total CO2 emissions in grams by model and user', - labels=['model', 'user'] - ) - for model, user, co2 in results: - metric.add_metric([model, user], co2) - yield metric - - # Sum of all latencies (Counter) - for calculating averages - cursor.execute(""" - SELECT model_name, user_name, - COALESCE(SUM(response_latency), 0) as total_latency - FROM requests - WHERE response_latency IS NOT NULL - GROUP BY model_name, user_name - """) - results = cursor.fetchall() - - metric = CounterMetricFamily( - 'llm_latency_seconds_total', - 'Sum of all response latencies by model and user', - labels=['model', 'user'] - ) - for model, user, latency_sum in results: - metric.add_metric([model, user], latency_sum) - yield metric - - def _get_gauge_metrics(self, cursor): - """Export current state metrics""" - - # Average latency by model (Gauge) - cursor.execute(""" - SELECT model_name, AVG(response_latency) as avg_latency - FROM requests - WHERE response_latency IS NOT NULL - GROUP BY model_name - """) - results = cursor.fetchall() - - metric = GaugeMetricFamily( - 'llm_latency_seconds_avg', - 'Average response latency by model', - labels=['model'] - ) - for model, avg_latency in results: - metric.add_metric([model], avg_latency or 0.0) - yield metric - - # Average latency by user (Gauge) - cursor.execute(""" - SELECT user_name, AVG(response_latency) as avg_latency - FROM requests - WHERE response_latency IS NOT NULL - GROUP BY user_name - """) - results = cursor.fetchall() - - metric = GaugeMetricFamily( - 'llm_latency_seconds_avg_by_user', - 'Average response latency by user', - labels=['user'] - ) - for user, avg_latency in results: - metric.add_metric([user], avg_latency or 0.0) - yield metric - - # Min/Max tokens per request by user (Gauge) - cursor.execute(""" - SELECT user_name, MIN(tokens_used), MAX(tokens_used) - FROM requests - WHERE tokens_used IS NOT NULL AND tokens_used > 0 - GROUP BY user_name - """) - results = cursor.fetchall() - - min_metric = GaugeMetricFamily( - 'llm_tokens_min', - 'Minimum tokens per request by user', - labels=['user'] - ) - max_metric = GaugeMetricFamily( - 'llm_tokens_max', - 'Maximum tokens per request by user', - labels=['user'] - ) - - for user, min_tokens, max_tokens in results: - min_metric.add_metric([user], min_tokens or 0) - max_metric.add_metric([user], max_tokens or 0) - - yield min_metric - yield max_metric - - # Recent activity (last hour) - Gauge - cursor.execute(""" - SELECT model_name, user_name, COUNT(*) as recent_requests - FROM requests - WHERE created_at >= datetime('now', '-1 hour') - GROUP BY model_name, user_name - """) - results = cursor.fetchall() - - metric = GaugeMetricFamily( - 'llm_requests_last_hour', - 'Number of requests in the last hour', - labels=['model', 'user'] - ) - for model, user, count in results: - metric.add_metric([model, user], count) - yield metric - -def main(): - import argparse - - parser = argparse.ArgumentParser(description='SQLite Prometheus Exporter for LLM Analytics') - parser.add_argument('--db-path', default='/data/requests.db', help='Path to SQLite database') - parser.add_argument('--port', type=int, default=8001, help='Port to serve metrics on') - - args = parser.parse_args() - - # Register the collector - REGISTRY.register(SQLiteCollector(args.db_path)) - - # Start the HTTP server - start_http_server(args.port) - print(f"Serving metrics on port {args.port}") - print(f"Database path: {args.db_path}") - print("Metrics available at http://localhost:{args.port}/metrics") - - try: - while True: - time.sleep(60) - except KeyboardInterrupt: - print("Exporter stopped") - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/backend/lib/auth.py b/backend/lib/auth.py new file mode 100644 index 0000000..5706457 --- /dev/null +++ b/backend/lib/auth.py @@ -0,0 +1,49 @@ +import secrets +import base64 +import os +import yaml +from fastapi import Request, Response + +# In your config.yaml loading section +with open("/config.yaml", "r") as f: + CONFIG = yaml.safe_load(f) + +# Get metrics auth from config or environment +METRICS_AUTH = CONFIG.get('metrics_auth', {}) +METRICS_USERNAME = METRICS_AUTH.get('username', 'admin') +METRICS_PASSWORD = METRICS_AUTH.get('password', 'change-me') + +def verify_metrics_auth(credentials: str) -> bool: + """Verify HTTP Basic Auth credentials for metrics endpoint""" + try: + decoded = base64.b64decode(credentials).decode("utf-8") + username, password = decoded.split(":", 1) + # Use secrets.compare_digest to prevent timing attacks + username_correct = secrets.compare_digest(username, METRICS_USERNAME) + password_correct = secrets.compare_digest(password, METRICS_PASSWORD) + return username_correct and password_correct + except Exception: + return False + +async def metrics_auth_middleware(request: Request, call_next): + """Middleware to protect /metrics endpoint with Basic Auth""" + if request.url.path.startswith("/metrics"): + auth_header = request.headers.get("Authorization") + + if not auth_header or not auth_header.startswith("Basic "): + return Response( + content="Unauthorized", + status_code=401, + headers={"WWW-Authenticate": 'Basic realm="Metrics"'} + ) + + credentials = auth_header[6:] # Remove "Basic " prefix + if not verify_metrics_auth(credentials): + return Response( + content="Invalid credentials", + status_code=401, + headers={"WWW-Authenticate": 'Basic realm="Metrics"'} + ) + + response = await call_next(request) + return response \ No newline at end of file diff --git a/backend/main.py b/backend/main.py index f9b0b87..45f656d 100644 --- a/backend/main.py +++ b/backend/main.py @@ -2,6 +2,7 @@ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse +from prometheus_client import make_asgi_app, Counter, Gauge import tempfile import io import time @@ -11,9 +12,41 @@ import base64 from lib.types import ChatCompletionRequest, EmbeddingInput, SpeechRequest, Message, MessageContent from lib.utils import estimate_tokens, extract_tokens_from_response +from lib.auth import metrics_auth_middleware from typing import Optional, List, Dict, Any, AsyncGenerator import aiohttp import lib.db + +request_by_model_count = Counter( + 'llm_requests_total', + 'Total number of requests by model and user', + ['model'] +) +request_by_user_count = Counter( + 'llm_requests_total_user', + 'Total number of requests by user', + ['user', 'model'] +) +token_by_request_count = Counter( + 'llm_tokens_total', + 'Total number of tokens used by model and user', + ['model'] +) +token_by_user_count = Counter( + 'llm_tokens_total_user', + 'Total number of tokens used by user and model', + ['user', 'model'] +) +latency_by_model = Gauge( + 'llm_request_latency_seconds', + 'Request latency in seconds by model', + ['model'] +) +latency_by_user = Gauge( + 'llm_request_latency_seconds_user', + 'Request latency in seconds by user', + ['user'] +) app = FastAPI( title="LLM Proxy API", description="Proxy API for Large Language Models with authentication and rate limiting", @@ -29,11 +62,15 @@ allow_methods=["*"], allow_headers=["*"], ) +app.middleware("http")(metrics_auth_middleware) +metrics_app = make_asgi_app() +app.mount("/metrics", metrics_app) # Security security = HTTPBearer() # Load configuration with open("/config.yaml", "r") as f: CONFIG = yaml.safe_load(f) + def get_model_config(model_name: str, user_key: Dict[str, Any]) -> Dict[str, Any]: if model_name not in user_key['models']: raise HTTPException( @@ -47,6 +84,73 @@ def get_model_config(model_name: str, user_key: Dict[str, Any]) -> Dict[str, Any status_code=status.HTTP_404_NOT_FOUND, detail="Model not found", ) + +async def fetch_image_as_base64(url: str) -> str: + """Fetch an image from a URL and convert it to base64 data URL""" + try: + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.9', + 'Accept-Encoding': 'gzip, deflate, br', + 'Connection': 'keep-alive', + 'Referer': 'https://www.google.com/' + } + + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=headers, timeout=aiohttp.ClientTimeout(total=30)) as resp: + if resp.status != 200: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Failed to fetch image from URL: HTTP {resp.status}" + ) + + # Get content type + content_type = resp.headers.get('content-type', 'image/jpeg') + if not content_type.startswith('image/'): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"URL does not point to an image (content-type: {content_type})" + ) + + # Read image data + image_data = await resp.read() + + # Validate image data is not empty + if len(image_data) == 0: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Fetched image data is empty" + ) + + # Convert to base64 + base64_data = base64.b64encode(image_data).decode('utf-8') + + # Return as data URL + return f"data:{content_type};base64,{base64_data}" + + except HTTPException: + raise + except aiohttp.ClientError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Failed to fetch image from URL: {str(e)}" + ) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Error processing image URL: {str(e)}" + ) + +# prometheus log functions +def log_metrics(model: str, user: str, tokens: int, latency: float): + request_by_model_count.labels(model=model).inc() + request_by_user_count.labels(user=user, model=model).inc() + token_by_request_count.labels(model=model).inc(tokens) + token_by_user_count.labels(user=user, model=model).inc(tokens) + latency_by_model.labels(model=model).set(latency) + latency_by_user.labels(user=user).set(latency) + # verify user token and model access def verify_token(credentials: HTTPAuthorizationCredentials = Security(security)): token = credentials.credentials @@ -58,11 +162,13 @@ def verify_token(credentials: HTTPAuthorizationCredentials = Security(security)) detail="Invalid or missing token or insufficient permissions", headers={"WWW-Authenticate": "Bearer"}, ) + def get_user_from_token(token: str) -> Optional[str]: for key in CONFIG['keys']: if key['token'] == token: return key['name'] return None + def validate_vision_request(model_config: Dict[str, Any], messages: List[Message]): """Validate that vision requests are only made to vision-enabled models""" has_images = False @@ -83,6 +189,7 @@ def validate_vision_request(model_config: Dict[str, Any], messages: List[Message ) return has_images + def validate_image_content(content_item: MessageContent): """Validate image content in messages""" if content_item.type == "image_url" and content_item.image_url: @@ -101,7 +208,7 @@ def validate_image_content(content_item: MessageContent): detail=f"Invalid base64 image data: {str(e)}" ) - # Check if it's a URL (optional - you might want to disable this for security) + # Allow HTTP(S) URLs without additional validation elif url.startswith(("http://", "https://")): return True @@ -112,6 +219,7 @@ def validate_image_content(content_item: MessageContent): ) return False + def estimate_tokens_with_vision(messages: List[Message]) -> int: """Estimate tokens for messages that may contain images""" total_tokens = 0 @@ -131,6 +239,7 @@ def estimate_tokens_with_vision(messages: List[Message]) -> int: total_tokens += 1000 # Approximate - adjust based on your models return total_tokens + # Add this function after your other fetch functions async def fetch_speech(model_config: Dict[str, Any], request_data: Dict[str, Any]) -> bytes: url = f"{model_config['params']['api_base']}/audio/speech" @@ -155,6 +264,7 @@ async def fetch_speech(model_config: Dict[str, Any], request_data: Dict[str, Any # Return the audio bytes return await resp.read() + # Add this function after your other fetch functions async def fetch_transcription(model_config: Dict[str, Any], file_path: str, request_data: Dict[str, Any]) -> Dict[str, Any]: url = f"{model_config['params']['api_base']}/audio/transcriptions" @@ -186,6 +296,7 @@ async def fetch_transcription(model_config: Dict[str, Any], file_path: str, requ return {"text": await resp.text()} else: return await resp.json() + # fetch chat completion from the model API streaming depends on verify_token async def fetch_chat_completion_stream(model_config: Dict[str, Any], request_data: Dict[str, Any]) -> AsyncGenerator[str, None]: url = f"{model_config['params']['api_base']}/chat/completions" @@ -219,6 +330,7 @@ async def fetch_chat_completion_stream(model_config: Dict[str, Any], request_dat else: # Line already properly formatted yield f"{line_str}\n\n" + # fetch chat completion from the model API non-streaming async def fetch_chat_completion(model_config: Dict[str, Any], request_data: Dict[str, Any]) -> Dict[str, Any]: url = f"{model_config['params']['api_base']}/chat/completions" @@ -233,6 +345,7 @@ async def fetch_chat_completion(model_config: Dict[str, Any], request_data: Dict text = await resp.text() raise HTTPException(status_code=resp.status, detail=f"Model API error: {text}") return await resp.json() + # Add this new function after your existing fetch functions async def fetch_embeddings(model_config: Dict[str, Any], request_data: Dict[str, Any]) -> Dict[str, Any]: url = f"{model_config['params']['api_base']}/embeddings" # Note: /embeddings not /chat/completions @@ -248,6 +361,7 @@ async def fetch_embeddings(model_config: Dict[str, Any], request_data: Dict[str, text = await resp.text() raise HTTPException(status_code=resp.status, detail=f"Model API error: {text}") return await resp.json() + # /chat/completions endpoint @app.post("/v1/chat/completions") async def chat_completions(request: ChatCompletionRequest, user_key = Depends(verify_token)): @@ -256,38 +370,52 @@ async def chat_completions(request: ChatCompletionRequest, user_key = Depends(ve # Validate vision support has_images = validate_vision_request(model_config, request.messages) - # Validate image content if present + # Validate and convert image content if present if has_images: for message in request.messages: if isinstance(message.content, list): for content_item in message.content: if content_item.type == "image_url": validate_image_content(content_item) - + + # Convert HTTP(S) URLs to base64 for OpenAI + if content_item.image_url.url.startswith(("http://", "https://")): + content_item.image_url.url = await fetch_image_as_base64(content_item.image_url.url) + request_data = request.dict(by_alias=True) + + # Convert to OpenAI format for vision messages + if has_images: + openai_messages = [] + for message in request.messages: + if isinstance(message.content, list): + # Convert to proper OpenAI format + openai_content = [] + for item in message.content: + if item.type == "text": + openai_content.append({"type": "text", "text": item.text}) + elif item.type == "image_url": + openai_content.append({ + "type": "image_url", + "image_url": { + "url": item.image_url.url, + "detail": getattr(item.image_url, 'detail', 'auto') + } + }) + openai_messages.append({"role": message.role, "content": openai_content}) + else: + openai_messages.append({"role": message.role, "content": message.content}) + request_data["messages"] = openai_messages - # FIX: Use the actual model name from config - request_data["model"] = model_config['params']['model'] # Maps "devstral" to "devstral:24b" + # Use the actual model name from config + request_data["model"] = model_config['params']['model'] if model_config['params'].get('drop_params'): - # For vision models, keep more parameters - if has_images: - request_data = { - "model": request_data["model"], - "messages": request_data["messages"], - "stream": request_data.get("stream", False), - "max_tokens": request_data.get("max_tokens"), - "temperature": request_data.get("temperature") - } - else: - # Regular text-only request - request_data = { - "model": request_data["model"], - "messages": request_data["messages"], - "stream": request_data.get("stream", False) - } + # Keep OpenAI-compatible parameters only + allowed_params = ["model", "messages", "stream", "max_tokens", "temperature", "top_p", "n", "stop", "presence_penalty", "frequency_penalty", "user"] + request_data = {k: v for k, v in request_data.items() if k in allowed_params and v is not None} - # Handle max_input_tokens for vision models differently + # Don't truncate messages with images if model_config['params'].get('max_input_tokens') and not has_images: # Only truncate text-only messages total_tokens = 0 @@ -348,7 +476,9 @@ async def event_generator(): response_time = time.time() - start_time output_tokens = estimate_tokens(collected_response) if collected_response else 0 total_tokens = estimated_input_tokens + output_tokens - + # Log metrics + log_metrics(request.model, get_user_from_token(user_key['token']), total_tokens, response_time) + # Create a structured response for logging if collected_response: # Store the actual collected content @@ -436,6 +566,9 @@ async def event_generator(): content_summary.append({"type": "image_url", "summary": "Image provided"}) messages_for_log.append({"role": msg.role, "content": content_summary}) + + # Log metrics + log_metrics(request.model, get_user_from_token(user_key['token']), total_tokens, response_time) # log the request in the database lib.db.create_request( user_name=get_user_from_token(user_key['token']), @@ -447,6 +580,7 @@ async def event_generator(): response_latency=response_time ) return response_data + # /embeddings endpoint @app.post("/v1/embeddings") async def create_embedding(request: EmbeddingInput, user_key = Depends(verify_token)): @@ -484,7 +618,8 @@ async def create_embedding(request: EmbeddingInput, user_key = Depends(verify_to total_tokens = extract_tokens_from_response(response_data) if total_tokens == 0: total_tokens = estimated_tokens - + # Log metrics + log_metrics(request.model, get_user_from_token(user_key['token']), total_tokens, response_time) # log the request in the database lib.db.create_request( user_name=get_user_from_token(user_key['token']), @@ -497,6 +632,7 @@ async def create_embedding(request: EmbeddingInput, user_key = Depends(verify_to ) return response_data + # /audio/transcriptions endpoint @app.post("/v1/audio/transcriptions") async def create_transcription( @@ -567,7 +703,8 @@ async def create_transcription( transcription_text = response_data estimated_tokens = estimate_tokens(transcription_text) if transcription_text else 0 - + # Log metrics + log_metrics(model, get_user_from_token(user_key['token']), estimated_tokens, response_time) # Log the request in the database lib.db.create_request( user_name=get_user_from_token(user_key['token']), @@ -585,6 +722,7 @@ async def create_transcription( # Clean up temporary file if os.path.exists(temp_file_path): os.unlink(temp_file_path) + # /audio/speech endpoint @app.post("/v1/audio/speech") async def create_speech( @@ -636,6 +774,8 @@ async def create_speech( # Calculate response time response_time = time.time() - start_time + # Log metrics + log_metrics(request.model, get_user_from_token(user_key['token']), estimated_tokens, response_time) # Log the request in the database lib.db.create_request( user_name=get_user_from_token(user_key['token']), @@ -671,6 +811,7 @@ async def create_speech( except Exception as e: raise e + # list models endpoint @app.get("/v1/models") async def list_models(user_key = Depends(verify_token)): @@ -697,6 +838,7 @@ async def list_models(user_key = Depends(verify_token)): "object": "list", "data": models } + if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/config.example.yaml b/config.example.yaml index 3e081bd..47aec5e 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -1,4 +1,6 @@ -global: +metrics_auth: + username: admin + password: your-secure-password model_list: - model_name: gemma3 params: diff --git a/docker-compose.yaml b/docker-compose.yaml index c428f0c..d37f4a5 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -36,22 +36,6 @@ services: - ./backend:/app - ./config.yaml:/config.yaml - # SQLite Prometheus Exporter - prometheus-exporter: - profiles: - - api - build: - context: ./backend - dockerfile: Dockerfile - ports: - - "8001:8001" - volumes: - - ./backend:/app - - ./data:/data - depends_on: - - api - command: ["python", "exporter.py", "--db-path", "/data/requests.db", "--port", "8001"] - prometheus: profiles: - api diff --git a/prometheus.yml b/prometheus.yml index 81585c9..ad67d79 100644 --- a/prometheus.yml +++ b/prometheus.yml @@ -4,5 +4,8 @@ global: scrape_configs: - job_name: 'prometheus-exporter' static_configs: - - targets: ['prometheus-exporter:8001'] - scrape_interval: 5s \ No newline at end of file + - targets: ['api:8000'] + scrape_interval: 5s + basic_auth: + username: 'admin' + password: 'your-secure-password' \ No newline at end of file