@@ -401,9 +401,9 @@ def get_size_or_none(x: Optional[torch.Tensor]):
401401 broadcast (input_metadata .block_tables , src = 0 )
402402 broadcast (sampling_metadata .selected_token_indices , src = 0 )
403403 else :
404- receving_list = [None ]
405- broadcast_object_list (receving_list , src = 0 )
406- py_data = receving_list [0 ]
404+ receiving_list = [None ]
405+ broadcast_object_list (receiving_list , src = 0 )
406+ py_data = receiving_list [0 ]
407407 input_tokens = torch .empty (
408408 * py_data ["input_tokens_size" ], dtype = torch .long , device = "cuda"
409409 )
@@ -505,9 +505,9 @@ def execute_model(
505505 model_executable = self .model
506506
507507 infer_text = sampling_metadata .seq_groups [0 ][1 ].infer_text
508- temperture = sampling_metadata .seq_groups [0 ][1 ].temperature
508+ temperature = sampling_metadata .seq_groups [0 ][1 ].temperature
509509 if not infer_text :
510- temperture = torch .tensor (temperture ).to (input_tokens .device )
510+ temperature = torch .tensor (temperature ).to (input_tokens .device )
511511 logits_processors , logits_warpers = sampling_metadata .seq_groups [0 ][
512512 1
513513 ].logits_processors
@@ -553,7 +553,7 @@ def execute_model(
553553 ),
554554 hidden_states = hidden_states ,
555555 infer_text = infer_text ,
556- temperature = temperture ,
556+ temperature = temperature ,
557557 logits_processors = logits_processors ,
558558 logits_warpers = logits_warpers ,
559559 min_new_token = min_new_token ,
0 commit comments