From 1d243eca880e17e3e4814c142601dbd9bd9b31a8 Mon Sep 17 00:00:00 2001 From: Dipankar Sarkar Date: Fri, 20 Jun 2025 06:49:08 +0000 Subject: [PATCH 1/5] Bug fix for spdTransform Signed-off-by: Dipankar Sarkar --- QEfficient/transformers/models/pytorch_transforms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/QEfficient/transformers/models/pytorch_transforms.py b/QEfficient/transformers/models/pytorch_transforms.py index 42807753d..1439738c0 100644 --- a/QEfficient/transformers/models/pytorch_transforms.py +++ b/QEfficient/transformers/models/pytorch_transforms.py @@ -503,6 +503,7 @@ class SpDTransform: @classmethod def apply(cls, model: nn.Module, qaic_config: Optional[dict] = None, **kwargs) -> Tuple[nn.Module, bool]: transformed = False + kwargs.pop("pretrained_model_name_or_path") if qaic_config is None or (speculative_model_type := qaic_config.get("speculative_model_type")) is None: return model, transformed elif speculative_model_type not in ( From e3d48c69f72f89865800c47bc09efe1a1b8dbb62 Mon Sep 17 00:00:00 2001 From: Dipankar Sarkar Date: Fri, 20 Jun 2025 08:14:47 +0000 Subject: [PATCH 2/5] ruff format fixed Signed-off-by: Dipankar Sarkar --- QEfficient/transformers/models/pytorch_transforms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/QEfficient/transformers/models/pytorch_transforms.py b/QEfficient/transformers/models/pytorch_transforms.py index 1439738c0..60f2ee295 100644 --- a/QEfficient/transformers/models/pytorch_transforms.py +++ b/QEfficient/transformers/models/pytorch_transforms.py @@ -504,6 +504,7 @@ class SpDTransform: def apply(cls, model: nn.Module, qaic_config: Optional[dict] = None, **kwargs) -> Tuple[nn.Module, bool]: transformed = False kwargs.pop("pretrained_model_name_or_path") + if qaic_config is None or (speculative_model_type := qaic_config.get("speculative_model_type")) is None: return model, transformed elif speculative_model_type not in ( From ec60e51f7f75e7d7d43ebeaa50007d4f86e07dfd Mon Sep 17 00:00:00 2001 From: Dipankar Sarkar Date: Fri, 20 Jun 2025 09:00:59 +0000 Subject: [PATCH 3/5] Minor changes Signed-off-by: Dipankar Sarkar --- QEfficient/transformers/models/pytorch_transforms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/QEfficient/transformers/models/pytorch_transforms.py b/QEfficient/transformers/models/pytorch_transforms.py index 60f2ee295..4c8809a9d 100644 --- a/QEfficient/transformers/models/pytorch_transforms.py +++ b/QEfficient/transformers/models/pytorch_transforms.py @@ -503,7 +503,7 @@ class SpDTransform: @classmethod def apply(cls, model: nn.Module, qaic_config: Optional[dict] = None, **kwargs) -> Tuple[nn.Module, bool]: transformed = False - kwargs.pop("pretrained_model_name_or_path") + kwargs.pop("pretrained_model_name_or_path", None) if qaic_config is None or (speculative_model_type := qaic_config.get("speculative_model_type")) is None: return model, transformed From ef0119243950a72aa1db2f40995044fbbe390ccd Mon Sep 17 00:00:00 2001 From: Dipankar Sarkar Date: Tue, 24 Jun 2025 05:09:38 +0000 Subject: [PATCH 4/5] Change in implementation Signed-off-by: Dipankar Sarkar --- QEfficient/transformers/models/modeling_auto.py | 3 +-- QEfficient/transformers/models/pytorch_transforms.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index 6bff10f5a..b7de82292 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -1415,10 +1415,9 @@ def __init__( self.num_layers = model.config.num_hidden_layers self.continuous_batching = continuous_batching self.model.qaic_config = qaic_config - + self.pretrained_model_name_or_path = kwargs.pop("pretrained_model_name_or_path", None) self.model, transformed = SpDTransform.apply(self.model, qaic_config, **kwargs) self.is_tlm = transformed - self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) # ---Sampling--- # Note: SamplerTransform should be applied after all other transforms diff --git a/QEfficient/transformers/models/pytorch_transforms.py b/QEfficient/transformers/models/pytorch_transforms.py index 4c8809a9d..34556cec1 100644 --- a/QEfficient/transformers/models/pytorch_transforms.py +++ b/QEfficient/transformers/models/pytorch_transforms.py @@ -503,7 +503,6 @@ class SpDTransform: @classmethod def apply(cls, model: nn.Module, qaic_config: Optional[dict] = None, **kwargs) -> Tuple[nn.Module, bool]: transformed = False - kwargs.pop("pretrained_model_name_or_path", None) if qaic_config is None or (speculative_model_type := qaic_config.get("speculative_model_type")) is None: return model, transformed From e4b335bdce093c2a74be6f8efd5296d97a4312d4 Mon Sep 17 00:00:00 2001 From: Dipankar Sarkar Date: Thu, 26 Jun 2025 03:48:48 +0000 Subject: [PATCH 5/5] Minor changes 1 Signed-off-by: Dipankar Sarkar --- QEfficient/transformers/models/modeling_auto.py | 2 +- QEfficient/transformers/models/pytorch_transforms.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index b7de82292..2f3ee3dc0 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -1415,7 +1415,7 @@ def __init__( self.num_layers = model.config.num_hidden_layers self.continuous_batching = continuous_batching self.model.qaic_config = qaic_config - self.pretrained_model_name_or_path = kwargs.pop("pretrained_model_name_or_path", None) + self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) self.model, transformed = SpDTransform.apply(self.model, qaic_config, **kwargs) self.is_tlm = transformed diff --git a/QEfficient/transformers/models/pytorch_transforms.py b/QEfficient/transformers/models/pytorch_transforms.py index 34556cec1..ca74c0ddd 100644 --- a/QEfficient/transformers/models/pytorch_transforms.py +++ b/QEfficient/transformers/models/pytorch_transforms.py @@ -503,7 +503,7 @@ class SpDTransform: @classmethod def apply(cls, model: nn.Module, qaic_config: Optional[dict] = None, **kwargs) -> Tuple[nn.Module, bool]: transformed = False - + pretrained_model_name_or_path_temp = kwargs.pop("pretrained_model_name_or_path", None) if qaic_config is None or (speculative_model_type := qaic_config.get("speculative_model_type")) is None: return model, transformed elif speculative_model_type not in ( @@ -525,6 +525,7 @@ def apply(cls, model: nn.Module, qaic_config: Optional[dict] = None, **kwargs) - raise NotImplementedError( f"model class {model_class} does not yet support returning multiple logits to keep." ) + kwargs["pretrained_model_name_or_path"] = pretrained_model_name_or_path_temp return model, transformed