|
7 | 7 | import unittest |
8 | 8 |
|
9 | 9 | import requests |
| 10 | +import time |
10 | 11 |
|
11 | 12 | from mlx_lm.server import APIHandler |
12 | 13 | from mlx_lm.utils import load |
@@ -69,6 +70,55 @@ def tearDownClass(cls): |
69 | 70 | cls.httpd.server_close() |
70 | 71 | cls.server_thread.join() |
71 | 72 |
|
| 73 | + def test_handle_chunked_request(self): |
| 74 | + url = f"http://localhost:{self.port}/v1/chat/completions" |
| 75 | + |
| 76 | + post_data = { |
| 77 | + "model": "default_model", |
| 78 | + "prompt": "Once upon a time", |
| 79 | + "max_tokens": 10, |
| 80 | + "temperature": 0.0, |
| 81 | + "stream": False, |
| 82 | + "top_p": 1.0, |
| 83 | + } |
| 84 | + |
| 85 | + # chunked request |
| 86 | + data_parts = [ |
| 87 | + b'{"model": "default_model", "messages": [{"role": "user", "content": "Once', |
| 88 | + b' upon a times, Once upon ', |
| 89 | + b'a time"}], "temperature": 0.8, "max_tokens": 1024, "stream": false}', |
| 90 | + ] |
| 91 | + |
| 92 | + max_length = 0 |
| 93 | + for part in data_parts: |
| 94 | + max_length += len(part) |
| 95 | + |
| 96 | + def data_generator(): |
| 97 | + for part in data_parts: |
| 98 | + yield part |
| 99 | + time.sleep(0.1) |
| 100 | + |
| 101 | + try: |
| 102 | + response = requests.post( |
| 103 | + url, |
| 104 | + data=data_generator(), |
| 105 | + headers={ |
| 106 | + "Transfer-Encoding": "chunked", |
| 107 | + "Content-Type": "application/json", |
| 108 | + }, |
| 109 | + ) |
| 110 | + self.assertEqual(response.status_code, 200) |
| 111 | + except requests.exceptions.RequestException: |
| 112 | + self.assertTrue(False, "Chunked request failed") |
| 113 | + |
| 114 | + response_body = json.loads(response.text) |
| 115 | + self.assertIn("id", response_body) |
| 116 | + self.assertIn("choices", response_body) |
| 117 | + self.assertIn("usage", response_body) |
| 118 | + |
| 119 | + # Check that tokens were generated |
| 120 | + self.assertTrue(response_body["usage"]["completion_tokens"] > 0) |
| 121 | + |
72 | 122 | def test_handle_completions(self): |
73 | 123 | url = f"http://localhost:{self.port}/v1/completions" |
74 | 124 |
|
|
0 commit comments