From a219563ce8bf2f919d70abbb09396979df5a6283 Mon Sep 17 00:00:00 2001 From: Amitabha Banerjee Date: Tue, 3 Jun 2025 12:28:29 -0700 Subject: [PATCH 1/2] Adding support for HTTP connection pooling --- llm_bench/load_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llm_bench/load_test.py b/llm_bench/load_test.py index 0b19e06..17e34f7 100644 --- a/llm_bench/load_test.py +++ b/llm_bench/load_test.py @@ -9,6 +9,7 @@ import traceback from typing import Optional from locust import HttpUser, task, events, constant_pacing +from urllib3 import PoolManager import copy import json import time @@ -519,6 +520,7 @@ def _load_curl_like_data(text): class LLMUser(HttpUser): # no wait time, so every user creates a continuous load, sending requests as quickly as possible + pool_manager = PoolManager(maxsize=50, block=True) def on_start(self): try: From 8fdefa378116e1c7d36ab0cdeb5ecf32e66c8c27 Mon Sep 17 00:00:00 2001 From: Amitabha Banerjee Date: Tue, 3 Jun 2025 16:29:25 -0700 Subject: [PATCH 2/2] Adding debugging for urllib3 --- llm_bench/load_test.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/llm_bench/load_test.py b/llm_bench/load_test.py index 17e34f7..f8c7746 100644 --- a/llm_bench/load_test.py +++ b/llm_bench/load_test.py @@ -988,6 +988,15 @@ def init_parser(parser): type=int, help="How many sequences to generate (makes sense to use with non-zero temperature).", ) + #DEBUG + import logging + from http.client import HTTPConnection + logging.basicConfig() + logging.getLogger().setLevel(logging.DEBUG) + requests_log = logging.getLogger("requests.packages.urllib3") + requests_log.setLevel(logging.DEBUG) + requests_log.propagate = True + #DEBUG @events.quitting.add_listener def _(environment, **kw):