ci(test): Add ML CI tests + load test harness

shieldx-bot · shieldx-bot · commit 735fdc43a504 · 2025-10-16T15:27:36.000Z
- GitHub Actions workflow to install requirements-test and run full suite
- Asyncio and Locust-based HTTP benchmarks for concurrency/latency
- Fix dl_service.py training endpoint syntax and validation
- Fix validate_code.py print statement
- Add README_TESTING with steps and performance targets
diff --git a/.github/workflows/ml-tests.yml b/.github/workflows/ml-tests.yml
@@ -0,0 +1,44 @@
+name: ML Service Tests
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  tests:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install system deps
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y python3-venv
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r services/shieldx-ml/requirements-test.txt
+
+      - name: Run ML test suite
+        working-directory: services/shieldx-ml
+        run: |
+          python run_tests.py
+
+      - name: Upload test report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: ml-test-report
+          path: services/shieldx-ml/test_report.json
+          if-no-files-found: warn
diff --git a/services/shieldx-ml/README_TESTING.md b/services/shieldx-ml/README_TESTING.md
@@ -0,0 +1,61 @@
+# ShieldX ML Service - Testing & Performance
+
+This doc explains how to validate the ML service correctness and evaluate performance targets.
+
+## Unit & Integration Tests
+
+- Full suite exists under `services/shieldx-ml/tests` and `services/shieldx-ml/ml-service/tests`.
+- Run locally (requires Python 3.11 and dependencies):
+
+```bash
+cd services/shieldx-ml
+python3 -m venv .venv && source .venv/bin/activate
+pip install -r requirements-test.txt
+python run_tests.py
+```
+
+- Or run quick static validation without heavy deps:
+
+```bash
+python3 validate_code.py
+```
+
+- CI runs these automatically via GitHub Actions: `.github/workflows/ml-tests.yml`.
+
+## Load & Concurrency Tests
+
+Targets:
+- 10,000 concurrent requests
+- < 100ms latency (p99)
+- 99% detection rate (recall)
+
+Tools provided:
+- `tools/locustfile.py`: Locust user model for HTTP inference.
+- `tools/bench_http.py`: asyncio benchmark for high-concurrency testing.
+
+Example (asyncio benchmark):
+```bash
+# Start the DL service first (port 8001)
+python3 ml-service/dl_service.py &
+# In another terminal:
+python3 tools/bench_http.py --base-url http://localhost:8001 --model autoencoder_demo --concurrency 1000 --rpc 10
+```
+
+Example (Locust):
+```bash
+pip install locust
+locust -f tools/locustfile.py --host http://localhost:8001
+```
+
+## Production Readiness Notes
+
+- For 10k concurrent with <100ms p99, deploy with:
+  - Gunicorn + Uvicorn workers or ASGI (Quart/FastAPI) for async IO
+  - Enable dynamic batching and GPU via `inference_engine.py`
+  - Redis-backed cache and model warmup
+  - Horizontal autoscaling (K8s HPA) and a gateway (HAProxy/NGINX) with keep-alive
+  - Consider Triton Inference Server + TensorRT for GPU acceleration
+
+- Detection 99% requires calibrated thresholds and balanced datasets; validate using `evaluate` endpoint with real distributions.
+
+- See docs/ML_MASTER_ROADMAP.md for completed optimization and monitoring features.
diff --git a/services/shieldx-ml/ml-service/dl_service.py b/services/shieldx-ml/ml-service/dl_service.py
@@ -149,23 +149,23 @@ def train_model(model_name: str):
             # Supervised models need labels
             if training_labels is None:
                 return jsonify({'error': f'{model_type} requires training_labels'}), 400
-            model.fit(training_data, training_labels, **training_params)
+            model.fit(
+                training_data,
+                training_labels,
+                epochs=training_params.get('epochs', 100),
+                batch_size=training_params.get('batch_size', 64),
+                validation_split=training_params.get('validation_split', 0.2),
+                early_stopping_patience=training_params.get('early_stopping_patience', 10)
+            )
         else:
             # Unsupervised models
-            model.fit(training_data, **training_params)
+            model.fit(
+                training_data,
+                epochs=training_params.get('epochs', 100),
+                batch_size=training_params.get('batch_size', 256),
+                validation_split=training_params.get('validation_split', 0.2),
+                early_stopping_patience=training_params.get('early_stopping_patience', 10)
             )
-        else:
-            return jsonify({'error': f'Unknown model type: {model_type}'}), 400
-        
-        # Train model
-        logger.info(f"Training {model_type} model: {model_name}")
-        model.fit(
-            training_data,
-            epochs=training_params.get('epochs', 100),
-            batch_size=training_params.get('batch_size', 256 if model_type == 'autoencoder' else 64),
-            validation_split=training_params.get('validation_split', 0.2),
-            early_stopping_patience=training_params.get('early_stopping_patience', 10)
-        )
         
         # Save model
         model_path = os.path.join(MODEL_DIR, f"{model_name}.pt")
diff --git a/services/shieldx-ml/tools/bench_http.py b/services/shieldx-ml/tools/bench_http.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+import asyncio
+import aiohttp
+import numpy as np
+import json
+import time
+from statistics import mean
+
+
+async def predict(session, url, batch=32, input_dim=50):
+    data = np.random.randn(batch, input_dim).tolist()
+    payload = {"data": data, "return_proba": False}
+    async with session.post(url, json=payload) as resp:
+        await resp.text()
+        return resp.status
+
+
+async def run_benchmark(base_url: str, model_name: str, concurrency: int = 1000, requests_per_client: int = 10):
+    url = f"{base_url}/models/{model_name}/predict"
+    timeout = aiohttp.ClientTimeout(total=30)
+    connector = aiohttp.TCPConnector(limit=0)
+    async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
+        latencies = []
+
+        async def worker():
+            for _ in range(requests_per_client):
+                start = time.perf_counter_ns()
+                status = await predict(session, url)
+                end = time.perf_counter_ns()
+                if status == 200:
+                    latencies.append((end - start) / 1e6)  # ms
+
+        tasks = [asyncio.create_task(worker()) for _ in range(concurrency)]
+        t0 = time.time()
+        await asyncio.gather(*tasks)
+        elapsed = time.time() - t0
+
+        if not latencies:
+            print("No successful requests.")
+            return
+
+        latencies.sort()
+        p50 = latencies[int(0.50 * len(latencies))]
+        p90 = latencies[int(0.90 * len(latencies))]
+        p99 = latencies[int(0.99 * len(latencies))]
+        print(f"Requests: {len(latencies)} in {elapsed:.2f}s, RPS={len(latencies)/elapsed:.1f}")
+        print(f"Latency ms: p50={p50:.2f}, p90={p90:.2f}, p99={p99:.2f}")
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--base-url", default="http://localhost:8001")
+    parser.add_argument("--model", default="autoencoder_demo")
+    parser.add_argument("--concurrency", type=int, default=1000)
+    parser.add_argument("--rpc", type=int, default=10, help="requests per client")
+    args = parser.parse_args()
+    asyncio.run(run_benchmark(args.base_url, args.model, args.concurrency, args.rpc))
diff --git a/services/shieldx-ml/tools/locustfile.py b/services/shieldx-ml/tools/locustfile.py
@@ -0,0 +1,23 @@
+from locust import HttpUser, task, between
+import json
+import numpy as np
+
+
+class InferenceUser(HttpUser):
+    wait_time = between(0.001, 0.01)
+
+    def on_start(self):
+        # Prepare a small random payload (adjust input_dim as per model)
+        self.input_dim = 50
+        self.batch = 32
+        self.model_name = "autoencoder_demo"
+
+    @task(3)
+    def predict(self):
+        data = np.random.randn(self.batch, self.input_dim).tolist()
+        payload = {"data": data, "return_proba": False}
+        self.client.post(f"/models/{self.model_name}/predict", data=json.dumps(payload), headers={"Content-Type": "application/json"})
+
+    @task(1)
+    def health(self):
+        self.client.get("/health")
diff --git a/services/shieldx-ml/validate_code.py b/services/shieldx-ml/validate_code.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+"""
+Quick validation script for ShieldX ML Service
+Checks Python syntax and basic imports without running full tests
+"""
+
+import sys
+import os
+from pathlib import Path
+import py_compile
+import importlib.util
+
+class Colors:
+    GREEN = '\033[0;32m'
+    RED = '\033[0;31m'
+    YELLOW = '\033[1;33m'
+    BLUE = '\033[0;34m'
+    BOLD = '\033[1m'
+    NC = '\033[0m'
+
+def print_header(text: str):
+    print(f"\n{Colors.BOLD}{Colors.BLUE}{'='*60}{Colors.NC}")
+    print(f"{Colors.BOLD}{Colors.BLUE}{text:^60}{Colors.NC}")
+    print(f"{Colors.BOLD}{Colors.BLUE}{'='*60}{Colors.NC}\n")
+
+def check_syntax(file_path: Path) -> bool:
+    """Check Python file syntax"""
+    try:
+        py_compile.compile(str(file_path), doraise=True)
+        return True
+    except py_compile.PyCompileError as e:
+        print(f"{Colors.RED}✗ Syntax Error:{Colors.NC} {file_path}")
+        print(f"  {e}")
+        return False
+
+def check_imports(file_path: Path) -> bool:
+    """Check if file can be imported (basic check)"""
+    try:
+        with open(file_path) as f:
+            content = f.read()
+        
+        # Check for common syntax issues
+        if 'import' in content:
+            # Basic validation
+            lines = content.split('\n')
+            for i, line in enumerate(lines, 1):
+                stripped = line.strip()
+                if stripped.startswith('import ') or stripped.startswith('from '):
+                    # Check for basic import syntax
+                    if stripped.endswith('\\'):
+                        continue  # Multi-line import
+                    if 'import' in stripped and not any(x in stripped for x in ['(', ',', 'as']):
+                        # Simple import
+                        parts = stripped.split()
+                        if len(parts) < 2:
+                            print(f"{Colors.YELLOW}⚠ Warning:{Colors.NC} Line {i}: {stripped}")
+        
+        return True
+    except Exception as e:
+        print(f"{Colors.RED}✗ Import Check Failed:{Colors.NC} {file_path}")
+        print(f"  {e}")
+        return False
+
+def validate_directory(directory: str, pattern: str = "**/*.py") -> tuple:
+    """Validate all Python files in directory"""
+    path = Path(directory)
+    if not path.exists():
+        print(f"{Colors.YELLOW}Directory not found: {directory}{Colors.NC}")
+        return 0, 0
+    
+    print(f"\n{Colors.BOLD}Validating: {directory}{Colors.NC}")
+    print("-" * 60)
+    
+    files = list(path.glob(pattern))
+    if not files:
+        print(f"{Colors.YELLOW}No Python files found{Colors.NC}")
+        return 0, 0
+    
+    passed = 0
+    failed = 0
+    
+    for file in sorted(files):
+        # Skip __pycache__ and venv
+        if '__pycache__' in str(file) or 'venv' in str(file):
+            continue
+        
+        # Check syntax
+        if check_syntax(file):
+            # Check imports
+            if check_imports(file):
+                print(f"{Colors.GREEN}✓{Colors.NC} {file.relative_to(path.parent)}")
+                passed += 1
+            else:
+                failed += 1
+        else:
+            failed += 1
+    
+    return passed, failed
+
+def main():
+    print_header("ShieldX ML Service - Quick Validation")
+    
+    os.chdir('/home/vananh/shieldx/services/shieldx-ml')
+    print(f"Working directory: {os.getcwd()}\n")
+    
+    total_passed = 0
+    total_failed = 0
+    
+    # Validate main ML service code
+    directories = [
+        'ml-service',
+        'tests',
+        'ml-service/tests'
+    ]
+    
+    for directory in directories:
+        if os.path.exists(directory):
+            passed, failed = validate_directory(directory)
+            total_passed += passed
+            total_failed += failed
+    
+    # Summary
+    print_header("Validation Summary")
+    print(f"Total Files:    {total_passed + total_failed}")
+    print(f"{Colors.GREEN}Passed:         {total_passed}{Colors.NC}")
+    print(f"{Colors.RED}Failed:         {total_failed}{Colors.NC}")
+    
+    if total_failed == 0:
+        print(f"\n{Colors.GREEN}{Colors.BOLD}✓ ALL FILES VALID!{Colors.NC}")
+        # Guidance for running full tests
+        print(f"\n{Colors.YELLOW}Note: Full test execution requires installing test dependencies.{Colors.NC}")
+        print(f"{Colors.YELLOW}Run: pip3 install -r requirements-test.txt{Colors.NC}")
+        return 0
+    else:
+        print(f"\n{Colors.RED}{Colors.BOLD}✗ VALIDATION FAILED{Colors.NC}")
+        return 1
+
+if __name__ == '__main__':
+    sys.exit(main())