diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index 6bff10f5a..2f3ee3dc0 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -1415,10 +1415,9 @@ def __init__( self.num_layers = model.config.num_hidden_layers self.continuous_batching = continuous_batching self.model.qaic_config = qaic_config - + self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) self.model, transformed = SpDTransform.apply(self.model, qaic_config, **kwargs) self.is_tlm = transformed - self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) # ---Sampling--- # Note: SamplerTransform should be applied after all other transforms diff --git a/QEfficient/transformers/models/pytorch_transforms.py b/QEfficient/transformers/models/pytorch_transforms.py index 42807753d..ca74c0ddd 100644 --- a/QEfficient/transformers/models/pytorch_transforms.py +++ b/QEfficient/transformers/models/pytorch_transforms.py @@ -503,6 +503,7 @@ class SpDTransform: @classmethod def apply(cls, model: nn.Module, qaic_config: Optional[dict] = None, **kwargs) -> Tuple[nn.Module, bool]: transformed = False + pretrained_model_name_or_path_temp = kwargs.pop("pretrained_model_name_or_path", None) if qaic_config is None or (speculative_model_type := qaic_config.get("speculative_model_type")) is None: return model, transformed elif speculative_model_type not in ( @@ -524,6 +525,7 @@ def apply(cls, model: nn.Module, qaic_config: Optional[dict] = None, **kwargs) - raise NotImplementedError( f"model class {model_class} does not yet support returning multiple logits to keep." ) + kwargs["pretrained_model_name_or_path"] = pretrained_model_name_or_path_temp return model, transformed