@@ -48,56 +48,6 @@ XllmHttpServiceImpl::XllmHttpServiceImpl(const HttpServiceConfig& config)
4848
4949XllmHttpServiceImpl::~XllmHttpServiceImpl () {}
5050
51- bool XllmHttpServiceImpl::create_channel (const std::string& target_uri) {
52- std::lock_guard<std::mutex> guard (channel_mutex_);
53- if (cached_channels_.find (target_uri) == cached_channels_.end ()) {
54- brpc::Channel* channel = new brpc::Channel ();
55- brpc::ChannelOptions options;
56- // Add to params
57- options.protocol = " http" ;
58- options.timeout_ms = config_.timeout_ms ; /* milliseconds*/
59- options.max_retry = 3 ;
60- std::string load_balancer = " " ;
61- if (channel->Init (target_uri.c_str (), load_balancer.c_str (), &options) !=
62- 0 ) {
63- LOG (ERROR) << " Fail to initialize channel for " << target_uri;
64- return false ;
65- }
66- cached_channels_[target_uri] = channel;
67- }
68-
69- return true ;
70- }
71-
72- std::string XllmHttpServiceImpl::get_redirect_uri (bool only_prefill) {
73- std::string target_instance_addr;
74- if (!rpc_service_) {
75- // for testing
76- if (config_.test_instance_addr .empty ()) {
77- LOG (ERROR) << " Rpc service is not start." ;
78- return " " ;
79- }
80- target_instance_addr = config_.test_instance_addr ;
81- } else {
82- InstancesPair instances_pair =
83- rpc_service_->select_instances_pair (only_prefill);
84- if (instances_pair.prefill_instance_http_addr .empty ()) {
85- LOG (ERROR) << " No prefill instance available." ;
86- return " " ;
87- }
88- target_instance_addr = instances_pair.prefill_instance_http_addr ;
89-
90- if (!only_prefill) {
91- if (instances_pair.decode_instance_http_addr .empty ()) {
92- // TODO:
93- }
94- // TODO: add instances_pair.decode_instance_http_addr to request?
95- }
96- }
97-
98- return target_instance_addr;
99- }
100-
10151void XllmHttpServiceImpl::Hello (::google::protobuf::RpcController* controller,
10252 const proto::HttpHelloRequest* request,
10353 proto::HttpHelloResponse* response,
@@ -198,7 +148,8 @@ void XllmHttpServiceImpl::handle(std::shared_ptr<T> call_data,
198148
199149 // async redistribute the request and wait the response
200150 // TODO: optimize the thread pool to async mode.
201- auto channel_ptr = cached_channels_[target_uri];
151+ brpc::Channel* channel_ptr = rpc_service_->get_channel (target_uri).get ();
152+
202153 // send request to prefill instance.
203154 thread_pool_->schedule ([this ,
204155 service_request_id,
@@ -360,24 +311,6 @@ void XllmHttpServiceImpl::post_serving(
360311 // create xllm_service request_id: service_request_id
361312 std::string service_request_id = generate_service_request_id (serving_method);
362313 json_value[" service_request_id" ] = service_request_id;
363- std::string req_attachment = json_value.dump ();
364- request_tracer_->log (service_request_id, req_attachment);
365-
366- // redistribute the request to the correct P/D instance
367- // TODO: redistribute policy to select the instance
368- std::string target_uri = get_redirect_uri ();
369- if (target_uri.empty ()) {
370- cntl->SetFailed (
371- " Internal runtime error, can not found a running instance." );
372- return ;
373- }
374- if (cached_channels_.find (target_uri) == cached_channels_.end ()) {
375- if (!create_channel (target_uri)) {
376- LOG (ERROR) << " Create channel failed, target_uri is " << target_uri;
377- cntl->SetFailed (" Internal runtime error." );
378- return ;
379- }
380- }
381314
382315 std::function<void (const std::string&)> trace_callback;
383316 if (config_.enable_request_trace ) {
@@ -388,33 +321,82 @@ void XllmHttpServiceImpl::post_serving(
388321 trace_callback = nullptr ;
389322 }
390323
324+ SchduleResult schedule_res;
391325 if (serving_method == " /v1/completions" ) {
326+ if (json_value.contains (" prompt" )) {
327+ if (!rpc_service_->schedule (json_value.at (" prompt" ).get <std::string>(),
328+ &schedule_res)) {
329+ cntl->SetFailed (" Schedule fail!" );
330+ LOG (ERROR) << " XllmRpcServiceImpl::schedule error!" ;
331+ return ;
332+ }
333+ } else {
334+ cntl->SetFailed (" Input has no prompt!" );
335+ LOG (ERROR) << " Input has no prompt!" ;
336+ return ;
337+ }
338+ json_value[" token_ids" ] = schedule_res.token_ids ;
339+ json_value[" routing" ] = schedule_res.routing .serialize_to_json ();
340+
341+ std::string req_attachment = json_value.dump ();
392342 auto arena = response->GetArena ();
393343 auto resp_pb =
394344 google::protobuf::Arena::CreateMessage<llm::proto::CompletionResponse>(
395345 arena);
396346 auto call_data = std::make_shared<CompletionCallData>(
397- cntl, stream, done_guard.release (), resp_pb, trace_callback );
347+ cntl, stream, done_guard.release (), resp_pb);
398348 handle_v1_completions (call_data,
399349 req_attachment,
400350 service_request_id,
401351 stream,
402352 model,
403353 include_usage,
404- target_uri );
354+ schedule_res. routing . prefill_name );
405355 } else if (serving_method == " /v1/chat/completions" ) {
356+ if (json_value.contains (" messages" ) && json_value[" messages" ].is_array ()) {
357+ ChatMessages messages;
358+ try {
359+ const auto & msgs = json_value[" messages" ];
360+ messages.reserve (msgs.size ());
361+ for (const auto & msg : msgs) {
362+ if (msg.contains (" role" ) && msg[" role" ].is_string () &&
363+ msg.contains (" content" ) && msg[" content" ].is_string ()) {
364+ messages.emplace_back (msg[" role" ].get <std::string>(),
365+ msg[" content" ].get <std::string>());
366+ }
367+ }
368+ } catch (const nlohmann::json::exception& e) {
369+ cntl->SetFailed (" Parse request fail, Invalid messages!" );
370+ LOG (ERROR) << " Parse request fail, Invalid messages!" ;
371+ return ;
372+ }
373+
374+ if (!rpc_service_->schedule (messages, &schedule_res)) {
375+ cntl->SetFailed (" Schedule fail!" );
376+ LOG (ERROR) << " XllmRpcServiceImpl::schedule error!" ;
377+ return ;
378+ }
379+ } else {
380+ cntl->SetFailed (" Input has no messages!" );
381+ LOG (ERROR) << " Input has no messages!" ;
382+ return ;
383+ }
384+ json_value[" token_ids" ] = schedule_res.token_ids ;
385+ json_value[" routing" ] = schedule_res.routing .serialize_to_json ();
386+
387+ std::string req_attachment = json_value.dump ();
406388 auto arena = response->GetArena ();
407389 auto resp_pb =
408390 google::protobuf::Arena::CreateMessage<llm::proto::ChatResponse>(arena);
409391 auto call_data = std::make_shared<ChatCallData>(
410- cntl, stream, done_guard.release (), resp_pb, trace_callback );
392+ cntl, stream, done_guard.release (), resp_pb);
411393 handle_v1_chat_completions (call_data,
412394 req_attachment,
413395 service_request_id,
414396 stream,
415397 model,
416398 include_usage,
417- target_uri );
399+ schedule_res. routing . prefill_name );
418400 } else {
419401 LOG (ERROR) << " Not supported method: " << serving_method;
420402 cntl->SetFailed (" Not supported method: " + serving_method);
@@ -456,22 +438,18 @@ void XllmHttpServiceImpl::get_serving(
456438 // done_guard.release());
457439 auto call_data = std::make_shared<CompletionCallData>(
458440 cntl, false , done_guard.release (), nullptr );
459- std::string target_uri = get_redirect_uri (true /* only_prefill*/ );
460- if (target_uri.empty ()) {
461- cntl->SetFailed (
462- " Internal runtime error, can not found a running instance." );
441+
442+ SchduleResult schedule_res;
443+ if (!rpc_service_->schedule (" " , &schedule_res)) {
444+ cntl->SetFailed (" Schedule fail!" );
445+ LOG (ERROR) << " XllmRpcServiceImpl::schedule error!" ;
463446 return ;
464447 }
465- if (cached_channels_.find (target_uri) == cached_channels_.end ()) {
466- if (!create_channel (target_uri)) {
467- LOG (ERROR) << " Create channel failed, target_uri is " << target_uri;
468- cntl->SetFailed (" Internal runtime error." );
469- return ;
470- }
471- }
472448
473- auto channel_ptr = cached_channels_[target_uri];
474- target_uri += serving_method;
449+ brpc::Channel* channel_ptr =
450+ rpc_service_->get_channel (schedule_res.routing .prefill_name ).get ();
451+ std::string target_uri = schedule_res.routing .prefill_name + serving_method;
452+
475453 thread_pool_->schedule (
476454 [/* req_attachment, */ call_data, cntl, channel_ptr, target_uri]() {
477455 brpc::Controller* redirect_cntl = new brpc::Controller ();
0 commit comments