Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 48 additions & 31 deletions fastdeploy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,9 +378,9 @@ def reset_config_value(key, value):
if not hasattr(self, key.lower()):
if os.getenv(key, None):
value = eval(os.getenv(key))
logger.info(f"Get parameter `{key}` = {value} from environment.")
logger.info("Get parameter `%s` = %s from environment.", key, value)
else:
logger.info(f"Parameter `{key}` will use default value {value}.")
logger.info("Parameter `%s` will use default value %s.", key, value)
setattr(self, key.lower(), value)

reset_config_value("COMPRESSION_RATIO", 1.0)
Expand Down Expand Up @@ -597,10 +597,10 @@ def print(self):
"""
Print all configuration information.
"""
logger.info("Model Configuration Information :")
logger.debug("Model Configuration Information:")
for k, v in self.__dict__.items():
logger.info("{:<20}:{:<6}{}".format(k, "", v))
logger.info("=============================================================")
logger.debug("{:<20}:{:<6}{}".format(k, "", v))
logger.debug("=============================================================")


class ParallelConfig:
Expand Down Expand Up @@ -679,7 +679,7 @@ def __init__(
and self.expert_parallel_size > 1
and self.tensor_parallel_size > 1
)
logger.info(f"use_sequence_parallel_moe: {self.use_sequence_parallel_moe}")
logger.debug("use_sequence_parallel_moe: %s", self.use_sequence_parallel_moe)

def set_communicate_group(self):
# different tp group id
Expand All @@ -700,18 +700,26 @@ def set_communicate_group(self):
self.ep_group = dist.new_group(range(self.expert_parallel_size))
dist.collective._set_custom_gid(None)
logger.info(
f"data_parallel_size: {self.data_parallel_size}, tensor_parallel_size: {self.tensor_parallel_size}, expert_parallel_size: {self.expert_parallel_size}, data_parallel_rank: {self.data_parallel_rank}, tensor_parallel_rank: {self.tensor_parallel_rank}, expert_parallel_rank: {self.expert_parallel_rank}, tp_group: {self.tp_group}."
"data_parallel_size: %d, tensor_parallel_size: %d, expert_parallel_size: %d, "
"data_parallel_rank: %d, tensor_parallel_rank: %d, expert_parallel_rank: %d, tp_group: %s",
self.data_parallel_size,
self.tensor_parallel_size,
self.expert_parallel_size,
self.data_parallel_rank,
self.tensor_parallel_rank,
self.expert_parallel_rank,
self.tp_group,
)

def print(self):
"""
print all config

"""
logger.info("Parallel Configuration Information :")
logger.debug("Parallel Configuration Information:")
for k, v in self.__dict__.items():
logger.info("{:<20}:{:<6}{}".format(k, "", v))
logger.info("=============================================================")
logger.debug("{:<20}:{:<6}{}".format(k, "", v))
logger.debug("=============================================================")


class SpeculativeConfig:
Expand Down Expand Up @@ -836,10 +844,10 @@ def print(self):
print all config

"""
logger.info("Speculative Decoding Configuration Information :")
logger.debug("Speculative Decoding Configuration Information:")
for k, v in self.__dict__.items():
logger.info("{:<20}:{:<6}{}".format(k, "", v))
logger.info("=============================================================")
logger.debug("{:<20}:{:<6}{}".format(k, "", v))
logger.debug("=============================================================")

def check_legality_parameters(
self,
Expand Down Expand Up @@ -1339,10 +1347,10 @@ def print(self):
"""
Print all configuration information.
"""
logger.info("EPLB Configuration Information :")
logger.debug("EPLB Configuration Information:")
for k, v in self.__dict__.items():
logger.info("{:<20}:{:<6}{}".format(k, "", v))
logger.info("=============================================================")
logger.debug("{:<20}:{:<6}{}".format(k, "", v))
logger.debug("=============================================================")


class CacheConfig:
Expand Down Expand Up @@ -1490,7 +1498,7 @@ def postprocess(self, num_total_tokens, number_of_tasks):
block_num = (length + self.block_size - 1 + self.dec_token_num) // self.block_size
self.total_block_num = block_num * number_of_tasks
self.prefill_kvcache_block_num = self.total_block_num
logger.info(f"Doing profile, the total_block_num:{self.total_block_num}")
logger.info("Doing profile, the total_block_num: %d", self.total_block_num)

def reset(self, num_gpu_blocks):
"""
Expand All @@ -1516,10 +1524,10 @@ def print(self):
print all config

"""
logger.info("Cache Configuration Information :")
logger.debug("Cache Configuration Information:")
for k, v in self.__dict__.items():
logger.info("{:<20}:{:<6}{}".format(k, "", v))
logger.info("=============================================================")
logger.debug("{:<20}:{:<6}{}".format(k, "", v))
logger.debug("=============================================================")


class RouterConfig:
Expand Down Expand Up @@ -1586,19 +1594,19 @@ def _load_from_version_file(self, file_path: str = None):
elif line.startswith("CXX compiler version:"):
self.compiler_version = line.split(":")[1].strip()
except FileNotFoundError:
logger.info(f"Warning: Version file not found at {file_path}")
logger.warning("Version file not found at %s", file_path)
except Exception as e:
logger.info(f"Warning: Could not read version file - {e!s}")
logger.warning("Could not read version file: %s", e)

def print(self):
"""
print all config

"""
logger.info("Fasedeploy Commit Information :")
logger.debug("FastDeploy Commit Information:")
for k, v in self.__dict__.items():
logger.info("{:<20}:{:<6}{}".format(k, "", v))
logger.info("=============================================================")
logger.debug("{:<20}:{:<6}{}".format(k, "", v))
logger.debug("=============================================================")


class StructuredOutputsConfig:
Expand Down Expand Up @@ -2121,11 +2129,20 @@ def print(self):
"""
print all config
"""
logger.info("=================== Configuration Information ===============")
logger.info(
"Configuration: model=%s, tp=%d, max_batch=%d, max_seq_len=%d, dtype=%s, device=%s",
self.model_config.model,
self.parallel_config.tensor_parallel_size,
self.scheduler_config.max_num_seqs,
self.model_config.max_model_len,
self.model_config.dtype,
self.parallel_config.device_ids,
)
logger.debug("=================== Configuration Information ===============")
for k, v in self.__dict__.items():
if k == "generation_config" and v is not None:
for gck, gcv in v.to_dict().items():
logger.info("{:<20}:{:<6}{}".format(gck, "", gcv))
logger.debug("{:<20}:{:<6}{}".format(gck, "", gcv))
elif (
k == "cache_config"
or k == "model_config"
Expand All @@ -2136,8 +2153,8 @@ def print(self):
if v is not None:
v.print()
else:
logger.info("{:<20}:{:<6}{}".format(k, "", v))
logger.info("=============================================================")
logger.debug("{:<20}:{:<6}{}".format(k, "", v))
logger.debug("=============================================================")

def init_cache_info(self):
"""
Expand Down Expand Up @@ -2171,7 +2188,7 @@ def init_cache_info(self):
"transfer_protocol": transfer_protocol,
"tp_size": self.parallel_config.tensor_parallel_size,
}
logger.info(f"register_info: {self.register_info}")
logger.debug("register_info: %s", self.register_info)

def read_from_config(self):
"""
Expand All @@ -2182,7 +2199,7 @@ def reset_value(cls, value_name, key):
if hasattr(cls, key):
value = getattr(cls, key)
setattr(cls, value_name, value)
logger.info(f"Reset parameter {value_name} = {value} from configuration.")
logger.info("Reset parameter %s = %s from configuration.", value_name, value)

reset_value(self.cache_config, "block_size", "infer_model_block_size")
reset_value(
Expand Down
Loading
Loading