diff --git a/.github/workflows/build_dev.yml b/.github/workflows/build_dev.yml
new file mode 100644
index 0000000..9d39fab
--- /dev/null
+++ b/.github/workflows/build_dev.yml
@@ -0,0 +1,33 @@
+name: Build and Push Docker Images
+
+on:
+  push:
+    branches:
+      - 'dev'
+  workflow_dispatch:
+
+jobs:
+  api:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          push: true
+          context: ./backend
+          tags: erasme/ai-proxy:${{ github.ref == 'refs/heads/main' && 'latest' || 'dev' }}
\ No newline at end of file
diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml
new file mode 100644
index 0000000..af9efa5
--- /dev/null
+++ b/.github/workflows/build_main.yml
@@ -0,0 +1,59 @@
+name: Update Version and build main branch
+
+on:
+  pull_request:
+    types:
+      - closed
+    branches:
+      - main # This ensures that only PRs merged into 'main' will trigger this workflow.
+
+permissions:
+  contents: write
+
+jobs:
+  create_tag:
+    runs-on: ubuntu-latest
+    steps:
+      ### Step 1: Check out the repository
+      - name: Checkout Code
+        if: ${{ github.event.pull_request.merged == true }}
+        uses: actions/checkout@v3
+      
+      ### Step 2: Configure Git
+      - name: Configure Git
+        if: ${{ github.event.pull_request.merged == true }}
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+      
+      ### Step 3: Bump the Version and Create Tag
+      - name: Bump Version and Create Tag
+        if: ${{ github.event.pull_request.merged == true }}
+        id: version
+        uses: anothrNick/github-tag-action@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          INITIAL_VERSION: 0.1.0
+          DEFAULT_BUMP: patch
+
+      - name: Checkout
+        uses: actions/checkout@v2
+      
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          push: true
+          context: ./backend
+          tags: erasme/ai-proxy:latest , erasme/ai-proxy:${{ steps.version.outputs.new_tag }}
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 4d69dff..0e9aced 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,5 @@ config.yaml
 *.db
 emissions.csv
 backend/.~lock.emissions.csv#
-data/
\ No newline at end of file
+data/
+venv
\ No newline at end of file
diff --git a/backend/exporter.py b/backend/exporter.py
deleted file mode 100644
index 8b25091..0000000
--- a/backend/exporter.py
+++ /dev/null
@@ -1,211 +0,0 @@
-from prometheus_client import start_http_server, REGISTRY
-from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily
-from prometheus_client.registry import Collector
-import sqlite3
-import time
-from typing import Dict, List, Tuple
-
-class SQLiteCollector(Collector):
-    def __init__(self, db_path: str):
-        self.db_path = db_path
-    
-    def get_db_connection(self):
-        return sqlite3.connect(self.db_path)
-    
-    def collect(self):
-        conn = self.get_db_connection()
-        cursor = conn.cursor()
-        
-        try:
-            # Export cumulative counters (these will be scraped regularly by Prometheus)
-            yield from self._get_counter_metrics(cursor)
-            
-            # Export current state gauges
-            yield from self._get_gauge_metrics(cursor)
-            
-        finally:
-            conn.close()
-    
-    def _get_counter_metrics(self, cursor):
-        """Export cumulative counters - Prometheus will calculate rates from these"""
-        
-        # Total requests by model and user (Counter)
-        cursor.execute("""
-            SELECT model_name, user_name, COUNT(*) as total
-            FROM requests 
-            GROUP BY model_name, user_name
-        """)
-        results = cursor.fetchall()
-        
-        metric = CounterMetricFamily(
-            'llm_requests_total', 
-            'Total number of requests by model and user', 
-            labels=['model', 'user']
-        )
-        for model, user, total in results:
-            metric.add_metric([model, user], total)
-        yield metric
-        
-        # Total tokens by model and user (Counter)
-        cursor.execute("""
-            SELECT model_name, user_name, COALESCE(SUM(tokens_used), 0) as total_tokens
-            FROM requests 
-            GROUP BY model_name, user_name
-        """)
-        results = cursor.fetchall()
-        
-        metric = CounterMetricFamily(
-            'llm_tokens_total', 
-            'Total tokens processed by model and user', 
-            labels=['model', 'user']
-        )
-        for model, user, tokens in results:
-            metric.add_metric([model, user], tokens)
-        yield metric
-        
-        # Total CO2 emissions by model and user (Counter)
-        cursor.execute("""
-            SELECT model_name, user_name, COALESCE(SUM(co2_emission), 0) as total_co2
-            FROM requests 
-            GROUP BY model_name, user_name
-        """)
-        results = cursor.fetchall()
-        
-        metric = CounterMetricFamily(
-            'llm_co2_grams_total', 
-            'Total CO2 emissions in grams by model and user', 
-            labels=['model', 'user']
-        )
-        for model, user, co2 in results:
-            metric.add_metric([model, user], co2)
-        yield metric
-        
-        # Sum of all latencies (Counter) - for calculating averages
-        cursor.execute("""
-            SELECT model_name, user_name, 
-                   COALESCE(SUM(response_latency), 0) as total_latency
-            FROM requests 
-            WHERE response_latency IS NOT NULL
-            GROUP BY model_name, user_name
-        """)
-        results = cursor.fetchall()
-        
-        metric = CounterMetricFamily(
-            'llm_latency_seconds_total', 
-            'Sum of all response latencies by model and user', 
-            labels=['model', 'user']
-        )
-        for model, user, latency_sum in results:
-            metric.add_metric([model, user], latency_sum)
-        yield metric
-    
-    def _get_gauge_metrics(self, cursor):
-        """Export current state metrics"""
-        
-        # Average latency by model (Gauge)
-        cursor.execute("""
-            SELECT model_name, AVG(response_latency) as avg_latency
-            FROM requests 
-            WHERE response_latency IS NOT NULL 
-            GROUP BY model_name
-        """)
-        results = cursor.fetchall()
-        
-        metric = GaugeMetricFamily(
-            'llm_latency_seconds_avg', 
-            'Average response latency by model', 
-            labels=['model']
-        )
-        for model, avg_latency in results:
-            metric.add_metric([model], avg_latency or 0.0)
-        yield metric
-        
-        # Average latency by user (Gauge)
-        cursor.execute("""
-            SELECT user_name, AVG(response_latency) as avg_latency
-            FROM requests 
-            WHERE response_latency IS NOT NULL 
-            GROUP BY user_name
-        """)
-        results = cursor.fetchall()
-        
-        metric = GaugeMetricFamily(
-            'llm_latency_seconds_avg_by_user', 
-            'Average response latency by user', 
-            labels=['user']
-        )
-        for user, avg_latency in results:
-            metric.add_metric([user], avg_latency or 0.0)
-        yield metric
-        
-        # Min/Max tokens per request by user (Gauge)
-        cursor.execute("""
-            SELECT user_name, MIN(tokens_used), MAX(tokens_used)
-            FROM requests 
-            WHERE tokens_used IS NOT NULL AND tokens_used > 0
-            GROUP BY user_name
-        """)
-        results = cursor.fetchall()
-        
-        min_metric = GaugeMetricFamily(
-            'llm_tokens_min', 
-            'Minimum tokens per request by user', 
-            labels=['user']
-        )
-        max_metric = GaugeMetricFamily(
-            'llm_tokens_max', 
-            'Maximum tokens per request by user', 
-            labels=['user']
-        )
-        
-        for user, min_tokens, max_tokens in results:
-            min_metric.add_metric([user], min_tokens or 0)
-            max_metric.add_metric([user], max_tokens or 0)
-        
-        yield min_metric
-        yield max_metric
-        
-        # Recent activity (last hour) - Gauge
-        cursor.execute("""
-            SELECT model_name, user_name, COUNT(*) as recent_requests
-            FROM requests 
-            WHERE created_at >= datetime('now', '-1 hour')
-            GROUP BY model_name, user_name
-        """)
-        results = cursor.fetchall()
-        
-        metric = GaugeMetricFamily(
-            'llm_requests_last_hour', 
-            'Number of requests in the last hour', 
-            labels=['model', 'user']
-        )
-        for model, user, count in results:
-            metric.add_metric([model, user], count)
-        yield metric
-
-def main():
-    import argparse
-    
-    parser = argparse.ArgumentParser(description='SQLite Prometheus Exporter for LLM Analytics')
-    parser.add_argument('--db-path', default='/data/requests.db', help='Path to SQLite database')
-    parser.add_argument('--port', type=int, default=8001, help='Port to serve metrics on')
-    
-    args = parser.parse_args()
-    
-    # Register the collector
-    REGISTRY.register(SQLiteCollector(args.db_path))
-    
-    # Start the HTTP server
-    start_http_server(args.port)
-    print(f"Serving metrics on port {args.port}")
-    print(f"Database path: {args.db_path}")
-    print("Metrics available at http://localhost:{args.port}/metrics")
-    
-    try:
-        while True:
-            time.sleep(60)
-    except KeyboardInterrupt:
-        print("Exporter stopped")
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
diff --git a/backend/lib/auth.py b/backend/lib/auth.py
new file mode 100644
index 0000000..5706457
--- /dev/null
+++ b/backend/lib/auth.py
@@ -0,0 +1,49 @@
+import secrets
+import base64
+import os
+import yaml
+from fastapi import Request, Response
+
+# In your config.yaml loading section
+with open("/config.yaml", "r") as f:
+    CONFIG = yaml.safe_load(f)
+
+# Get metrics auth from config or environment
+METRICS_AUTH = CONFIG.get('metrics_auth', {})
+METRICS_USERNAME = METRICS_AUTH.get('username', 'admin')
+METRICS_PASSWORD = METRICS_AUTH.get('password', 'change-me')
+
+def verify_metrics_auth(credentials: str) -> bool:
+    """Verify HTTP Basic Auth credentials for metrics endpoint"""
+    try:
+        decoded = base64.b64decode(credentials).decode("utf-8")
+        username, password = decoded.split(":", 1)
+        # Use secrets.compare_digest to prevent timing attacks
+        username_correct = secrets.compare_digest(username, METRICS_USERNAME)
+        password_correct = secrets.compare_digest(password, METRICS_PASSWORD)
+        return username_correct and password_correct
+    except Exception:
+        return False
+    
+async def metrics_auth_middleware(request: Request, call_next):
+    """Middleware to protect /metrics endpoint with Basic Auth"""
+    if request.url.path.startswith("/metrics"):
+        auth_header = request.headers.get("Authorization")
+        
+        if not auth_header or not auth_header.startswith("Basic "):
+            return Response(
+                content="Unauthorized",
+                status_code=401,
+                headers={"WWW-Authenticate": 'Basic realm="Metrics"'}
+            )
+        
+        credentials = auth_header[6:]  # Remove "Basic " prefix
+        if not verify_metrics_auth(credentials):
+            return Response(
+                content="Invalid credentials",
+                status_code=401,
+                headers={"WWW-Authenticate": 'Basic realm="Metrics"'}
+            )
+    
+    response = await call_next(request)
+    return response
\ No newline at end of file
diff --git a/backend/main.py b/backend/main.py
index f9b0b87..45f656d 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -2,6 +2,7 @@
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse
+from prometheus_client import make_asgi_app, Counter, Gauge
 import tempfile
 import io
 import time
@@ -11,9 +12,41 @@
 import base64
 from lib.types import ChatCompletionRequest, EmbeddingInput, SpeechRequest, Message, MessageContent
 from lib.utils import estimate_tokens, extract_tokens_from_response
+from lib.auth import metrics_auth_middleware
 from typing import Optional, List, Dict, Any, AsyncGenerator
 import aiohttp
 import lib.db
+
+request_by_model_count = Counter(
+    'llm_requests_total',
+    'Total number of requests by model and user',
+    ['model']
+)
+request_by_user_count = Counter(
+    'llm_requests_total_user',
+    'Total number of requests by user',
+    ['user', 'model']
+)
+token_by_request_count = Counter(
+    'llm_tokens_total',
+    'Total number of tokens used by model and user',
+    ['model']
+)
+token_by_user_count = Counter(
+    'llm_tokens_total_user',
+    'Total number of tokens used by user and model',
+    ['user', 'model']
+)
+latency_by_model = Gauge(
+    'llm_request_latency_seconds',
+    'Request latency in seconds by model',
+    ['model']
+)
+latency_by_user = Gauge(
+    'llm_request_latency_seconds_user',
+    'Request latency in seconds by user',
+    ['user']
+)
 app = FastAPI(
     title="LLM Proxy API",
     description="Proxy API for Large Language Models with authentication and rate limiting",
@@ -29,11 +62,15 @@
     allow_methods=["*"],
     allow_headers=["*"],
 )
+app.middleware("http")(metrics_auth_middleware)
+metrics_app = make_asgi_app()
+app.mount("/metrics", metrics_app)
 # Security
 security = HTTPBearer()
 # Load configuration
 with open("/config.yaml", "r") as f:
     CONFIG = yaml.safe_load(f)
+
 def get_model_config(model_name: str, user_key: Dict[str, Any]) -> Dict[str, Any]:
     if model_name not in user_key['models']:
         raise HTTPException(
@@ -47,6 +84,73 @@ def get_model_config(model_name: str, user_key: Dict[str, Any]) -> Dict[str, Any
         status_code=status.HTTP_404_NOT_FOUND,
         detail="Model not found",
     )
+
+async def fetch_image_as_base64(url: str) -> str:
+    """Fetch an image from a URL and convert it to base64 data URL"""
+    try:
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.9',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Connection': 'keep-alive',
+            'Referer': 'https://www.google.com/'
+        }
+        
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, headers=headers, timeout=aiohttp.ClientTimeout(total=30)) as resp:
+                if resp.status != 200:
+                    raise HTTPException(
+                        status_code=status.HTTP_400_BAD_REQUEST,
+                        detail=f"Failed to fetch image from URL: HTTP {resp.status}"
+                    )
+                
+                # Get content type
+                content_type = resp.headers.get('content-type', 'image/jpeg')
+                if not content_type.startswith('image/'):
+                    raise HTTPException(
+                        status_code=status.HTTP_400_BAD_REQUEST,
+                        detail=f"URL does not point to an image (content-type: {content_type})"
+                    )
+                
+                # Read image data
+                image_data = await resp.read()
+                
+                # Validate image data is not empty
+                if len(image_data) == 0:
+                    raise HTTPException(
+                        status_code=status.HTTP_400_BAD_REQUEST,
+                        detail="Fetched image data is empty"
+                    )
+                
+                # Convert to base64
+                base64_data = base64.b64encode(image_data).decode('utf-8')
+                
+                # Return as data URL
+                return f"data:{content_type};base64,{base64_data}"
+    
+    except HTTPException:
+        raise
+    except aiohttp.ClientError as e:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Failed to fetch image from URL: {str(e)}"
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Error processing image URL: {str(e)}"
+        )
+
+# prometheus log functions
+def log_metrics(model: str, user: str, tokens: int, latency: float):
+    request_by_model_count.labels(model=model).inc()
+    request_by_user_count.labels(user=user, model=model).inc()
+    token_by_request_count.labels(model=model).inc(tokens)
+    token_by_user_count.labels(user=user, model=model).inc(tokens)
+    latency_by_model.labels(model=model).set(latency)
+    latency_by_user.labels(user=user).set(latency)
+
 # verify user token and model access
 def verify_token(credentials: HTTPAuthorizationCredentials = Security(security)):
     token = credentials.credentials
@@ -58,11 +162,13 @@ def verify_token(credentials: HTTPAuthorizationCredentials = Security(security))
         detail="Invalid or missing token or insufficient permissions",
         headers={"WWW-Authenticate": "Bearer"},
     )
+
 def get_user_from_token(token: str) -> Optional[str]:
     for key in CONFIG['keys']:
         if key['token'] == token:
             return key['name']
     return None
+
 def validate_vision_request(model_config: Dict[str, Any], messages: List[Message]):
     """Validate that vision requests are only made to vision-enabled models"""
     has_images = False
@@ -83,6 +189,7 @@ def validate_vision_request(model_config: Dict[str, Any], messages: List[Message
         )
     
     return has_images
+
 def validate_image_content(content_item: MessageContent):
     """Validate image content in messages"""
     if content_item.type == "image_url" and content_item.image_url:
@@ -101,7 +208,7 @@ def validate_image_content(content_item: MessageContent):
                     detail=f"Invalid base64 image data: {str(e)}"
                 )
         
-        # Check if it's a URL (optional - you might want to disable this for security)
+        # Allow HTTP(S) URLs without additional validation
         elif url.startswith(("http://", "https://")):
             return True
         
@@ -112,6 +219,7 @@ def validate_image_content(content_item: MessageContent):
             )
     
     return False
+
 def estimate_tokens_with_vision(messages: List[Message]) -> int:
     """Estimate tokens for messages that may contain images"""
     total_tokens = 0
@@ -131,6 +239,7 @@ def estimate_tokens_with_vision(messages: List[Message]) -> int:
                     total_tokens += 1000  # Approximate - adjust based on your models
     
     return total_tokens
+
 # Add this function after your other fetch functions
 async def fetch_speech(model_config: Dict[str, Any], request_data: Dict[str, Any]) -> bytes:
     url = f"{model_config['params']['api_base']}/audio/speech"
@@ -155,6 +264,7 @@ async def fetch_speech(model_config: Dict[str, Any], request_data: Dict[str, Any
             
             # Return the audio bytes
             return await resp.read()
+
 # Add this function after your other fetch functions
 async def fetch_transcription(model_config: Dict[str, Any], file_path: str, request_data: Dict[str, Any]) -> Dict[str, Any]:
     url = f"{model_config['params']['api_base']}/audio/transcriptions"
@@ -186,6 +296,7 @@ async def fetch_transcription(model_config: Dict[str, Any], file_path: str, requ
                     return {"text": await resp.text()}
                 else:
                     return await resp.json()
+
 # fetch chat completion from the model API streaming depends on verify_token
 async def fetch_chat_completion_stream(model_config: Dict[str, Any], request_data: Dict[str, Any]) -> AsyncGenerator[str, None]:
     url = f"{model_config['params']['api_base']}/chat/completions"
@@ -219,6 +330,7 @@ async def fetch_chat_completion_stream(model_config: Dict[str, Any], request_dat
                     else:
                         # Line already properly formatted
                         yield f"{line_str}\n\n"
+
 # fetch chat completion from the model API non-streaming
 async def fetch_chat_completion(model_config: Dict[str, Any], request_data: Dict[str, Any]) -> Dict[str, Any]:
     url = f"{model_config['params']['api_base']}/chat/completions"
@@ -233,6 +345,7 @@ async def fetch_chat_completion(model_config: Dict[str, Any], request_data: Dict
                 text = await resp.text()
                 raise HTTPException(status_code=resp.status, detail=f"Model API error: {text}")
             return await resp.json()
+
 # Add this new function after your existing fetch functions
 async def fetch_embeddings(model_config: Dict[str, Any], request_data: Dict[str, Any]) -> Dict[str, Any]:
     url = f"{model_config['params']['api_base']}/embeddings"  # Note: /embeddings not /chat/completions
@@ -248,6 +361,7 @@ async def fetch_embeddings(model_config: Dict[str, Any], request_data: Dict[str,
                 text = await resp.text()
                 raise HTTPException(status_code=resp.status, detail=f"Model API error: {text}")
             return await resp.json()
+
 # /chat/completions endpoint
 @app.post("/v1/chat/completions")
 async def chat_completions(request: ChatCompletionRequest, user_key = Depends(verify_token)):
@@ -256,38 +370,52 @@ async def chat_completions(request: ChatCompletionRequest, user_key = Depends(ve
     # Validate vision support
     has_images = validate_vision_request(model_config, request.messages)
     
-    # Validate image content if present
+    # Validate and convert image content if present
     if has_images:
         for message in request.messages:
             if isinstance(message.content, list):
                 for content_item in message.content:
                     if content_item.type == "image_url":
                         validate_image_content(content_item)
-    
+                        
+                        # Convert HTTP(S) URLs to base64 for OpenAI
+                        if content_item.image_url.url.startswith(("http://", "https://")):
+                            content_item.image_url.url = await fetch_image_as_base64(content_item.image_url.url)
+
     request_data = request.dict(by_alias=True)
+
+    # Convert to OpenAI format for vision messages
+    if has_images:
+        openai_messages = []
+        for message in request.messages:
+            if isinstance(message.content, list):
+                # Convert to proper OpenAI format
+                openai_content = []
+                for item in message.content:
+                    if item.type == "text":
+                        openai_content.append({"type": "text", "text": item.text})
+                    elif item.type == "image_url":
+                        openai_content.append({
+                            "type": "image_url",
+                            "image_url": {
+                                "url": item.image_url.url,
+                                "detail": getattr(item.image_url, 'detail', 'auto')
+                            }
+                        })
+                openai_messages.append({"role": message.role, "content": openai_content})
+            else:
+                openai_messages.append({"role": message.role, "content": message.content})
+        request_data["messages"] = openai_messages
     
-    # FIX: Use the actual model name from config
-    request_data["model"] = model_config['params']['model']  # Maps "devstral" to "devstral:24b"
+    # Use the actual model name from config
+    request_data["model"] = model_config['params']['model']
     
     if model_config['params'].get('drop_params'):
-        # For vision models, keep more parameters
-        if has_images:
-            request_data = {
-                "model": request_data["model"],
-                "messages": request_data["messages"],
-                "stream": request_data.get("stream", False),
-                "max_tokens": request_data.get("max_tokens"),
-                "temperature": request_data.get("temperature")
-            }
-        else:
-            # Regular text-only request
-            request_data = {
-                "model": request_data["model"],
-                "messages": request_data["messages"],
-                "stream": request_data.get("stream", False)
-            }
+        # Keep OpenAI-compatible parameters only
+        allowed_params = ["model", "messages", "stream", "max_tokens", "temperature", "top_p", "n", "stop", "presence_penalty", "frequency_penalty", "user"]
+        request_data = {k: v for k, v in request_data.items() if k in allowed_params and v is not None}
     
-    # Handle max_input_tokens for vision models differently
+    # Don't truncate messages with images
     if model_config['params'].get('max_input_tokens') and not has_images:
         # Only truncate text-only messages
         total_tokens = 0
@@ -348,7 +476,9 @@ async def event_generator():
                 response_time = time.time() - start_time
                 output_tokens = estimate_tokens(collected_response) if collected_response else 0
                 total_tokens = estimated_input_tokens + output_tokens
-                
+                # Log metrics
+                log_metrics(request.model, get_user_from_token(user_key['token']), total_tokens, response_time)
+            
                 # Create a structured response for logging
                 if collected_response:
                     # Store the actual collected content
@@ -436,6 +566,9 @@ async def event_generator():
                         content_summary.append({"type": "image_url", "summary": "Image provided"})
                 messages_for_log.append({"role": msg.role, "content": content_summary})
         
+        
+         # Log metrics
+        log_metrics(request.model, get_user_from_token(user_key['token']), total_tokens, response_time)
         # log the request in the database
         lib.db.create_request(
             user_name=get_user_from_token(user_key['token']),
@@ -447,6 +580,7 @@ async def event_generator():
             response_latency=response_time
         )
         return response_data
+
 # /embeddings endpoint
 @app.post("/v1/embeddings")
 async def create_embedding(request: EmbeddingInput, user_key = Depends(verify_token)):
@@ -484,7 +618,8 @@ async def create_embedding(request: EmbeddingInput, user_key = Depends(verify_to
     total_tokens = extract_tokens_from_response(response_data)
     if total_tokens == 0:
         total_tokens = estimated_tokens
-    
+    # Log metrics
+    log_metrics(request.model, get_user_from_token(user_key['token']), total_tokens, response_time)
     # log the request in the database
     lib.db.create_request(
         user_name=get_user_from_token(user_key['token']),
@@ -497,6 +632,7 @@ async def create_embedding(request: EmbeddingInput, user_key = Depends(verify_to
     )
     
     return response_data
+
 # /audio/transcriptions endpoint
 @app.post("/v1/audio/transcriptions")
 async def create_transcription(
@@ -567,7 +703,8 @@ async def create_transcription(
             transcription_text = response_data
         
         estimated_tokens = estimate_tokens(transcription_text) if transcription_text else 0
-        
+        # Log metrics
+        log_metrics(model, get_user_from_token(user_key['token']), estimated_tokens, response_time)
         # Log the request in the database
         lib.db.create_request(
             user_name=get_user_from_token(user_key['token']),
@@ -585,6 +722,7 @@ async def create_transcription(
         # Clean up temporary file
         if os.path.exists(temp_file_path):
             os.unlink(temp_file_path)
+
 # /audio/speech endpoint
 @app.post("/v1/audio/speech")
 async def create_speech(
@@ -636,6 +774,8 @@ async def create_speech(
         # Calculate response time
         response_time = time.time() - start_time
         
+        # Log metrics
+        log_metrics(request.model, get_user_from_token(user_key['token']), estimated_tokens, response_time)
         # Log the request in the database
         lib.db.create_request(
             user_name=get_user_from_token(user_key['token']),
@@ -671,6 +811,7 @@ async def create_speech(
         
     except Exception as e:
         raise e
+
 # list models endpoint
 @app.get("/v1/models")
 async def list_models(user_key = Depends(verify_token)):
@@ -697,6 +838,7 @@ async def list_models(user_key = Depends(verify_token)):
         "object": "list",
         "data": models
     }
+
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)
\ No newline at end of file
diff --git a/config.example.yaml b/config.example.yaml
index 3e081bd..47aec5e 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -1,4 +1,6 @@
-global:
+metrics_auth:
+  username: admin
+  password: your-secure-password
 model_list:
   - model_name: gemma3
         params:
diff --git a/docker-compose.yaml b/docker-compose.yaml
index c428f0c..d37f4a5 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -36,22 +36,6 @@ services:
       - ./backend:/app
       - ./config.yaml:/config.yaml
 
-    # SQLite Prometheus Exporter
-  prometheus-exporter:
-    profiles:
-      - api
-    build:
-      context: ./backend
-      dockerfile: Dockerfile
-    ports:
-      - "8001:8001"
-    volumes:
-      - ./backend:/app
-      - ./data:/data
-    depends_on:
-      - api
-    command: ["python", "exporter.py", "--db-path", "/data/requests.db", "--port", "8001"]
-  
   prometheus:
     profiles:
       - api
diff --git a/prometheus.yml b/prometheus.yml
index 81585c9..ad67d79 100644
--- a/prometheus.yml
+++ b/prometheus.yml
@@ -4,5 +4,8 @@ global:
 scrape_configs:
   - job_name: 'prometheus-exporter'
     static_configs:
-      - targets: ['prometheus-exporter:8001'] 
-    scrape_interval: 5s
\ No newline at end of file
+      - targets: ['api:8000'] 
+    scrape_interval: 5s
+    basic_auth:
+      username: 'admin'
+      password: 'your-secure-password'
\ No newline at end of file