Skip to content

Commit 418863b

Browse files
committed
added more tests
1 parent 612a3be commit 418863b

File tree

1 file changed

+238
-0
lines changed

1 file changed

+238
-0
lines changed

.github/workflows/test-mcp-server.yml

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,3 +355,241 @@ jobs:
355355
run: |
356356
echo "Checking Docker logs for troubleshooting..."
357357
docker compose logs || echo "Failed to get logs"
358+
359+
test-with-external-llm:
360+
runs-on: ubuntu-latest
361+
needs: test # Run after the local tests complete
362+
363+
env:
364+
LLM_API_BASE: "https://0x9fcf7888963793472bfcb8c14f4b6b47a7462f17.gaia.domains/v1"
365+
LLM_MODEL: "Qwen2.5-Coder-3B-Instruct"
366+
LLM_EMBED_MODEL: "gte-Qwen2-1.5B-instruct"
367+
LLM_EMBED_SIZE: "1536"
368+
LLM_API_KEY: "" # No API key needed for this public node
369+
SKIP_VECTOR_SEARCH: "true"
370+
371+
services:
372+
qdrant:
373+
image: qdrant/qdrant:latest
374+
ports:
375+
- 6333:6333
376+
- 6334:6334
377+
378+
steps:
379+
- name: Checkout code
380+
uses: actions/[email protected]
381+
382+
- name: Install Python and dependencies
383+
uses: actions/setup-python@v4
384+
with:
385+
python-version: '3.10'
386+
387+
- name: Install jq and curl
388+
run: sudo apt-get install -y jq curl
389+
390+
- name: Install Python dependencies
391+
run: pip install -r requirements.txt
392+
393+
- name: Create test directories
394+
run: |
395+
mkdir -p output
396+
mkdir -p data/project_examples
397+
mkdir -p data/error_examples
398+
399+
- name: Start API server with external LLM
400+
run: |
401+
echo "Starting API server with external Gaia node..."
402+
python -m uvicorn app.main:app --host 0.0.0.0 --port 8000 &
403+
sleep 10 # Give the server time to start
404+
405+
- name: Verify API server is running
406+
run: |
407+
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/docs || echo "CURL_FAILED")
408+
if [[ "$HTTP_CODE" != "200" ]]; then
409+
echo "API server is not running properly. Status code: $HTTP_CODE"
410+
ps aux | grep uvicorn
411+
exit 1
412+
fi
413+
echo "API server is running correctly"
414+
415+
- name: Test /compile endpoint with external LLM
416+
run: |
417+
echo "Testing /compile endpoint..."
418+
RESPONSE=$(curl -s -S -f -X POST http://localhost:8000/compile \
419+
-H "Content-Type: application/json" \
420+
-d '{
421+
"code": "[filename: Cargo.toml]\n[package]\nname = \"hello_world\"\nversion = \"0.1.0\"\nedition = \"2021\"\n\n[dependencies]\n\n[filename: src/main.rs]\nfn main() {\n println!(\"Hello, World!\");\n}"
422+
}' || echo "CURL_FAILED")
423+
424+
if [ "$RESPONSE" = "CURL_FAILED" ]; then
425+
echo "Failed to connect to API service"
426+
exit 1
427+
fi
428+
429+
# Check for success in response
430+
if ! echo "$RESPONSE" | jq -e '.success == true' > /dev/null; then
431+
echo "Compilation failed:"
432+
echo "$RESPONSE" | jq || echo "$RESPONSE"
433+
exit 1
434+
fi
435+
436+
echo "Compilation successful with external LLM!"
437+
echo "$RESPONSE" | jq || echo "$RESPONSE"
438+
439+
- name: Test /compile-and-fix endpoint with external LLM
440+
run: |
441+
echo "Testing /compile-and-fix endpoint..."
442+
RESPONSE=$(curl -s -S -f -X POST http://localhost:8000/compile-and-fix \
443+
-H "Content-Type: application/json" \
444+
-d '{
445+
"code": "[filename: Cargo.toml]\n[package]\nname = \"hello_world\"\nversion = \"0.1.0\"\nedition = \"2021\"\n\n[dependencies]\n\n[filename: src/main.rs]\nfn main() {\n println!(\"Hello, World!\" // Missing closing parenthesis\n}",
446+
"description": "A simple hello world program",
447+
"max_attempts": 3
448+
}' || echo "CURL_FAILED")
449+
450+
if [ "$RESPONSE" = "CURL_FAILED" ]; then
451+
echo "Failed to connect to API service"
452+
exit 1
453+
fi
454+
455+
# Save full response for debugging
456+
echo "$RESPONSE" > compile_fix_response.txt
457+
458+
# Check if response is JSON or text format
459+
if [[ "$RESPONSE" == {* ]]; then
460+
# JSON response (likely error)
461+
echo "Got JSON response (may be expected with external LLM):"
462+
echo "$RESPONSE" | jq || echo "$RESPONSE"
463+
else
464+
# Text response (success case)
465+
echo "Compile and fix successful with external LLM! Got text response with fixed code."
466+
fi
467+
468+
- name: Test /generate-sync with external LLM
469+
id: test-generate-sync-external
470+
continue-on-error: true
471+
run: |
472+
echo "Testing /generate-sync endpoint with external LLM..."
473+
RESPONSE=$(curl -X POST http://localhost:8000/generate-sync \
474+
-H "Content-Type: application/json" \
475+
-d '{"description": "A simple command-line calculator in Rust", "requirements": "Should support addition, subtraction, multiplication, and division"}')
476+
477+
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://localhost:8000/generate-sync \
478+
-H "Content-Type: application/json" \
479+
-d '{"description": "A simple command-line calculator in Rust", "requirements": "Should support addition, subtraction, multiplication, and division"}')
480+
481+
echo "HTTP response code: $HTTP_CODE"
482+
483+
if [[ "$HTTP_CODE" != "200" ]]; then
484+
echo "External LLM generation failed with code $HTTP_CODE"
485+
echo "Response: $RESPONSE"
486+
echo "status=error" >> $GITHUB_OUTPUT
487+
exit 1
488+
fi
489+
490+
# Save response to file for later use
491+
echo "$RESPONSE" > external_generate_output.txt
492+
493+
# Verify the response format has filename markers
494+
if ! echo "$RESPONSE" | grep -q "\[filename:"; then
495+
echo "Response does not contain filename markers:"
496+
echo "$RESPONSE" | head -20
497+
echo "status=error" >> $GITHUB_OUTPUT
498+
exit 1
499+
fi
500+
501+
# Check if this is a fallback template
502+
if echo "$RESPONSE" | grep -q "THIS IS A FALLBACK TEMPLATE - LLM generation failed"; then
503+
echo "WARNING: Response contains fallback template - external LLM generation failed"
504+
echo "status=fallback" >> $GITHUB_OUTPUT
505+
else
506+
echo "External LLM generate-sync successful! Response contains code files in text format."
507+
echo "status=success" >> $GITHUB_OUTPUT
508+
fi
509+
510+
- name: "Test workflow with external LLM: /generate-sync → /compile"
511+
if: steps.test-generate-sync-external.outcome == 'success'
512+
run: |
513+
echo "Testing workflow with external LLM: /generate-sync → /compile..."
514+
515+
# Check if response contains fallback template
516+
if grep -q "FALLBACK TEMPLATE" external_generate_output.txt; then
517+
echo "WARNING: Testing with fallback template code - external LLM generation failed but continuing with tests"
518+
fi
519+
520+
# Get the output from the previous step and remove the build status comment
521+
GENERATE_OUTPUT=$(cat external_generate_output.txt | sed '/^# Build/,$d')
522+
523+
# Pass the cleaned generated code directly to compile
524+
COMPILE_RESPONSE=$(curl -s -S -f -X POST http://localhost:8000/compile \
525+
-H "Content-Type: application/json" \
526+
-d "{
527+
\"code\": $(echo "$GENERATE_OUTPUT" | jq -Rs .)
528+
}" || echo "CURL_FAILED")
529+
530+
if [ "$COMPILE_RESPONSE" = "CURL_FAILED" ]; then
531+
echo "Failed to connect to API service"
532+
exit 1
533+
fi
534+
535+
# Check for success in response
536+
if ! echo "$COMPILE_RESPONSE" | jq -e '.success == true' > /dev/null; then
537+
echo "Compilation failed:"
538+
echo "$COMPILE_RESPONSE" | jq || echo "$COMPILE_RESPONSE"
539+
exit 1
540+
fi
541+
542+
echo "External LLM workflow test successful! Generated code compiles correctly."
543+
echo "$COMPILE_RESPONSE" | jq || echo "$COMPILE_RESPONSE"
544+
545+
- name: Test /generate endpoint with external LLM
546+
continue-on-error: true
547+
run: |
548+
echo "Testing /generate endpoint with external LLM..."
549+
550+
# Generate the project
551+
RESPONSE=$(curl -s -S -f -X POST http://localhost:8000/generate \
552+
-H "Content-Type: application/json" \
553+
-d '{
554+
"description": "A Rust program that converts between different units of measurement",
555+
"requirements": "Support length (m, cm, inch, feet), weight (kg, g, lb), and temperature (C, F, K)"
556+
}' || echo "CURL_FAILED")
557+
558+
if [ "$RESPONSE" = "CURL_FAILED" ]; then
559+
echo "Failed to connect to API service"
560+
exit 1
561+
fi
562+
563+
# Extract project_id from response
564+
PROJECT_ID=$(echo "$RESPONSE" | jq -r '.project_id')
565+
echo "Project ID: $PROJECT_ID"
566+
567+
# Poll for project completion (maximum 6 attempts, 20 seconds apart)
568+
echo "Polling for project completion..."
569+
for i in {1..6}; do
570+
echo "Checking project status (attempt $i)..."
571+
STATUS_RESPONSE=$(curl -s -S -f "http://localhost:8000/project/$PROJECT_ID" || echo "CURL_FAILED")
572+
573+
if [ "$STATUS_RESPONSE" = "CURL_FAILED" ]; then
574+
echo "Failed to get project status"
575+
continue
576+
fi
577+
578+
STATUS=$(echo "$STATUS_RESPONSE" | jq -r '.status')
579+
echo "Current status: $STATUS"
580+
581+
if [ "$STATUS" = "completed" ] || [ "$STATUS" = "failed" ]; then
582+
echo "Project generation finished with status: $STATUS"
583+
echo "$STATUS_RESPONSE" | jq
584+
break
585+
fi
586+
587+
# If still processing, wait and try again
588+
if [ $i -eq 6 ]; then
589+
echo "Project generation taking too long, but this is acceptable for testing"
590+
break
591+
fi
592+
593+
echo "Waiting 20 seconds before next check..."
594+
sleep 20
595+
done

0 commit comments

Comments
 (0)