Skip to content

LJSpeech pretrained ckpt don't work #47

@SAnsAN-9119

Description

@SAnsAN-9119

I am trying to run the synthesis.ipynb using GDrive Link , but I get:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[28], line 8
      5     return model
      6 count_params = lambda x: f"{sum(p.numel() for p in x.parameters()):,}"
----> 8 model = load_model(PFLOW_CHECKPOINT)
      9 print(f"Model loaded! Parameter count: {count_params(model)}")

Cell In[28], line 3, in load_model(checkpoint_path)
      1 def load_model(checkpoint_path):
      2     print(checkpoint_path)
----> 3     model = pflowTTS.load_from_checkpoint(checkpoint_path, map_location=device)
      4     model.eval()
      5     return model

File ~/anaconda3/envs/pflowtts/lib/python3.10/site-packages/lightning/pytorch/utilities/model_helpers.py:125, in _restricted_classmethod_impl.__get__.<locals>.wrapper(*args, **kwargs)
    120 if instance is not None and not is_scripting:
    121     raise TypeError(
    122         f"The classmethod `{cls.__name__}.{self.method.__name__}` cannot be called on an instance."
    123         " Please call it on the class type and make sure the return value is used."
    124     )
--> 125 return self.method(cls, *args, **kwargs)

File ~/anaconda3/envs/pflowtts/lib/python3.10/site-packages/lightning/pytorch/core/module.py:1582, in LightningModule.load_from_checkpoint(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)
   1493 @_restricted_classmethod
   1494 def load_from_checkpoint(
   1495     cls,
   (...)
   1500     **kwargs: Any,
   1501 ) -> Self:
   1502     r"""Primary way of loading a model from a checkpoint. When Lightning saves a checkpoint it stores the arguments
   1503     passed to ``__init__``  in the checkpoint under ``"hyper_parameters"``.
   1504 
   (...)
   1580 
   1581     """
-> 1582     loaded = _load_from_checkpoint(
   1583         cls,
   1584         checkpoint_path,
   1585         map_location,
   1586         hparams_file,
   1587         strict,
   1588         **kwargs,
   1589     )
   1590     return cast(Self, loaded)

File ~/anaconda3/envs/pflowtts/lib/python3.10/site-packages/lightning/pytorch/core/saving.py:91, in _load_from_checkpoint(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)
     89     return _load_state(cls, checkpoint, **kwargs)
     90 if issubclass(cls, pl.LightningModule):
---> 91     model = _load_state(cls, checkpoint, strict=strict, **kwargs)
     92     state_dict = checkpoint["state_dict"]
     93     if not state_dict:

File ~/anaconda3/envs/pflowtts/lib/python3.10/site-packages/lightning/pytorch/core/saving.py:187, in _load_state(cls, checkpoint, strict, **cls_kwargs_new)
    184     obj.on_load_checkpoint(checkpoint)
    186 # load the state_dict on the model automatically
--> 187 keys = obj.load_state_dict(checkpoint["state_dict"], strict=strict)
    189 if not strict:
    190     if keys.missing_keys:

File ~/anaconda3/envs/pflowtts/lib/python3.10/site-packages/torch/nn/modules/module.py:2215, in Module.load_state_dict(self, state_dict, strict, assign)
   2210         error_msgs.insert(
   2211             0, 'Missing key(s) in state_dict: {}. '.format(
   2212                 ', '.join(f'"{k}"' for k in missing_keys)))
   2214 if len(error_msgs) > 0:
-> 2215     raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
   2216                        self.__class__.__name__, "\n\t".join(error_msgs)))
   2217 return _IncompatibleKeys(missing_keys, unexpected_keys)

RuntimeError: Error(s) in loading state_dict for pflowTTS:
	Unexpected key(s) in state_dict: "encoder.speech_prompt_encoder.attn_layers.0.conv_q.weight", "encoder.speech_prompt_encoder.attn_layers.0.conv_q.bias", "encoder.speech_prompt_encoder.attn_layers.0.conv_k.weight", "encoder.speech_prompt_encoder.attn_layers.0.conv_k.bias", "encoder.speech_prompt_encoder.attn_layers.0.conv_v.weight", "encoder.speech_prompt_encoder.attn_layers.0.conv_v.bias", "encoder.speech_prompt_encoder.attn_layers.0.conv_o.weight", "encoder.speech_prompt_encoder.attn_layers.0.conv_o.bias", "encoder.speech_prompt_encoder.attn_layers.1.conv_q.weight", "encoder.speech_prompt_encoder.attn_layers.1.conv_q.bias", "encoder.speech_prompt_encoder.attn_layers.1.conv_k.weight", "encoder.speech_prompt_encoder.attn_layers.1.conv_k.bias", "encoder.speech_prompt_encoder.attn_layers.1.conv_v.weight", "encoder.speech_prompt_encoder.attn_layers.1.conv_v.bias", "encoder.speech_prompt_encoder.attn_layers.1.conv_o.weight", "encoder.speech_prompt_encoder.attn_layers.1.conv_o.bias", "encoder.speech_prompt_encoder.attn_layers.2.conv_q.weight", "encoder.speech_prompt_encoder.attn_layers.2.conv_q.bias", "encoder.speech_prompt_encoder.attn_layers.2.conv_k.weight", "encoder.speech_prompt_encoder.attn_layers.2.conv_k.bias", "encoder.speech_prompt_encoder.attn_layers.2.conv_v.weight", "encoder.speech_prompt_encoder.attn_layers.2.conv_v.bias", "encoder.speech_prompt_encoder.attn_layers.2.conv_o.weight", "encoder.speech_prompt_encoder.attn_layers.2.conv_o.bias", "encoder.speech_prompt_encoder.attn_layers.3.conv_q.weight", "encoder.speech_prompt_encoder.attn_layers.3.conv_q.bias", "encoder.speech_prompt_encoder.attn_layers.3.conv_k.weight", "encoder.speech_prompt_encoder.attn_layers.3.conv_k.bias", "encoder.speech_prompt_encoder.attn_layers.3.conv_v.weight", "encoder.speech_prompt_encoder.attn_layers.3.conv_v.bias", "encoder.speech_prompt_encoder.attn_layers.3.conv_o.weight", "encoder.speech_prompt_encoder.attn_layers.3.conv_o.bias", "encoder.speech_prompt_encoder.attn_layers.4.conv_q.weight", "encoder.speech_prompt_encoder.attn_layers.4.conv_q.bias", "encoder.speech_prompt_encoder.attn_layers.4.conv_k.weight", "encoder.speech_prompt_encoder.attn_layers.4.conv_k.bias", "encoder.speech_prompt_encoder.attn_layers.4.conv_v.weight", "encoder.speech_prompt_encoder.attn_layers.4.conv_v.bias", "encoder.speech_prompt_encoder.attn_layers.4.conv_o.weight", "encoder.speech_prompt_encoder.attn_layers.4.conv_o.bias", "encoder.speech_prompt_encoder.attn_layers.5.conv_q.weight", "encoder.speech_prompt_encoder.attn_layers.5.conv_q.bias", "encoder.speech_prompt_encoder.attn_layers.5.conv_k.weight", "encoder.speech_prompt_encoder.attn_layers.5.conv_k.bias", "encoder.speech_prompt_encoder.attn_layers.5.conv_v.weight", "encoder.speech_prompt_encoder.attn_layers.5.conv_v.bias", "encoder.speech_prompt_encoder.attn_layers.5.conv_o.weight", "encoder.speech_prompt_encoder.attn_layers.5.conv_o.bias", "encoder.speech_prompt_encoder.norm_layers_1.0.gamma", "encoder.speech_prompt_encoder.norm_layers_1.0.beta", "encoder.speech_prompt_encoder.norm_layers_1.1.gamma", "encoder.speech_prompt_encoder.norm_layers_1.1.beta", "encoder.speech_prompt_encoder.norm_layers_1.2.gamma", "encoder.speech_prompt_encoder.norm_layers_1.2.beta", "encoder.speech_prompt_encoder.norm_layers_1.3.gamma", "encoder.speech_prompt_encoder.norm_layers_1.3.beta", "encoder.speech_prompt_encoder.norm_layers_1.4.gamma", "encoder.speech_prompt_encoder.norm_layers_1.4.beta", "encoder.speech_prompt_encoder.norm_layers_1.5.gamma", "encoder.speech_prompt_encoder.norm_layers_1.5.beta", "encoder.speech_prompt_encoder.ffn_layers.0.conv_1.weight", "encoder.speech_prompt_encoder.ffn_layers.0.conv_1.bias", "encoder.speech_prompt_encoder.ffn_layers.0.conv_2.weight", "encoder.speech_prompt_encoder.ffn_layers.0.conv_2.bias", "encoder.speech_prompt_encoder.ffn_layers.1.conv_1.weight", "encoder.speech_prompt_encoder.ffn_layers.1.conv_1.bias", "encoder.speech_prompt_encoder.ffn_layers.1.conv_2.weight", "encoder.speech_prompt_encoder.ffn_layers.1.conv_2.bias", "encoder.speech_prompt_encoder.ffn_layers.2.conv_1.weight", "encoder.speech_prompt_encoder.ffn_layers.2.conv_1.bias", "encoder.speech_prompt_encoder.ffn_layers.2.conv_2.weight", "encoder.speech_prompt_encoder.ffn_layers.2.conv_2.bias", "encoder.speech_prompt_encoder.ffn_layers.3.conv_1.weight", "encoder.speech_prompt_encoder.ffn_layers.3.conv_1.bias", "encoder.speech_prompt_encoder.ffn_layers.3.conv_2.weight", "encoder.speech_prompt_encoder.ffn_layers.3.conv_2.bias", "encoder.speech_prompt_encoder.ffn_layers.4.conv_1.weight", "encoder.speech_prompt_encoder.ffn_layers.4.conv_1.bias", "encoder.speech_prompt_encoder.ffn_layers.4.conv_2.weight", "encoder.speech_prompt_encoder.ffn_layers.4.conv_2.bias", "encoder.speech_prompt_encoder.ffn_layers.5.conv_1.weight", "encoder.speech_prompt_encoder.ffn_layers.5.conv_1.bias", "encoder.speech_prompt_encoder.ffn_layers.5.conv_2.weight", "encoder.speech_prompt_encoder.ffn_layers.5.conv_2.bias", "encoder.speech_prompt_encoder.norm_layers_2.0.gamma", "encoder.speech_prompt_encoder.norm_layers_2.0.beta", "encoder.speech_prompt_encoder.norm_layers_2.1.gamma", "encoder.speech_prompt_encoder.norm_layers_2.1.beta", "encoder.speech_prompt_encoder.norm_layers_2.2.gamma", "encoder.speech_prompt_encoder.norm_layers_2.2.beta", "encoder.speech_prompt_encoder.norm_layers_2.3.gamma", "encoder.speech_prompt_encoder.norm_layers_2.3.beta", "encoder.speech_prompt_encoder.norm_layers_2.4.gamma", "encoder.speech_prompt_encoder.norm_layers_2.4.beta", "encoder.speech_prompt_encoder.norm_layers_2.5.gamma", "encoder.speech_prompt_encoder.norm_layers_2.5.beta", "encoder.decoder.self_attn_layers.0.conv_q.weight", "encoder.decoder.self_attn_layers.0.conv_q.bias", "encoder.decoder.self_attn_layers.0.conv_k.weight", "encoder.decoder.self_attn_layers.0.conv_k.bias", "encoder.decoder.self_attn_layers.0.conv_v.weight", "encoder.decoder.self_attn_layers.0.conv_v.bias", "encoder.decoder.self_attn_layers.0.conv_o.weight", "encoder.decoder.self_attn_layers.0.conv_o.bias", "encoder.decoder.self_attn_layers.1.conv_q.weight", "encoder.decoder.self_attn_layers.1.conv_q.bias", "encoder.decoder.self_attn_layers.1.conv_k.weight", "encoder.decoder.self_attn_layers.1.conv_k.bias", "encoder.decoder.self_attn_layers.1.conv_v.weight", "encoder.decoder.self_attn_layers.1.conv_v.bias", "encoder.decoder.self_attn_layers.1.conv_o.weight", "encoder.decoder.self_attn_layers.1.conv_o.bias", "encoder.decoder.self_attn_layers.2.conv_q.weight", "encoder.decoder.self_attn_layers.2.conv_q.bias", "encoder.decoder.self_attn_layers.2.conv_k.weight", "encoder.decoder.self_attn_layers.2.conv_k.bias", "encoder.decoder.self_attn_layers.2.conv_v.weight", "encoder.decoder.self_attn_layers.2.conv_v.bias", "encoder.decoder.self_attn_layers.2.conv_o.weight", "encoder.decoder.self_attn_layers.2.conv_o.bias", "encoder.decoder.self_attn_layers.3.conv_q.weight", "encoder.decoder.self_attn_layers.3.conv_q.bias", "encoder.decoder.self_attn_layers.3.conv_k.weight", "encoder.decoder.self_attn_layers.3.conv_k.bias", "encoder.decoder.self_attn_layers.3.conv_v.weight", "encoder.decoder.self_attn_layers.3.conv_v.bias", "encoder.decoder.self_attn_layers.3.conv_o.weight", "encoder.decoder.self_attn_layers.3.conv_o.bias", "encoder.decoder.self_attn_layers.4.conv_q.weight", "encoder.decoder.self_attn_layers.4.conv_q.bias", "encoder.decoder.self_attn_layers.4.conv_k.weight", "encoder.decoder.self_attn_layers.4.conv_k.bias", "encoder.decoder.self_attn_layers.4.conv_v.weight", "encoder.decoder.self_attn_layers.4.conv_v.bias", "encoder.decoder.self_attn_layers.4.conv_o.weight", "encoder.decoder.self_attn_layers.4.conv_o.bias", "encoder.decoder.self_attn_layers.5.conv_q.weight", "encoder.decoder.self_attn_layers.5.conv_q.bias", "encoder.decoder.self_attn_layers.5.conv_k.weight", "encoder.decoder.self_attn_layers.5.conv_k.bias", "encoder.decoder.self_attn_layers.5.conv_v.weight", "encoder.decoder.self_attn_layers.5.conv_v.bias", "encoder.decoder.self_attn_layers.5.conv_o.weight", "encoder.decoder.self_attn_layers.5.conv_o.bias", "encoder.decoder.norm_layers_0.0.gamma", "encoder.decoder.norm_layers_0.0.beta", "encoder.decoder.norm_layers_0.1.gamma", "encoder.decoder.norm_layers_0.1.beta", "encoder.decoder.norm_layers_0.2.gamma", "encoder.decoder.norm_layers_0.2.beta", "encoder.decoder.norm_layers_0.3.gamma", "encoder.decoder.norm_layers_0.3.beta", "encoder.decoder.norm_layers_0.4.gamma", "encoder.decoder.norm_layers_0.4.beta", "encoder.decoder.norm_layers_0.5.gamma", "encoder.decoder.norm_layers_0.5.beta", "encoder.decoder.encdec_attn_layers.0.conv_q.weight", "encoder.decoder.encdec_attn_layers.0.conv_q.bias", "encoder.decoder.encdec_attn_layers.0.conv_k.weight", "encoder.decoder.encdec_attn_layers.0.conv_k.bias", "encoder.decoder.encdec_attn_layers.0.conv_v.weight", "encoder.decoder.encdec_attn_layers.0.conv_v.bias", "encoder.decoder.encdec_attn_layers.0.conv_o.weight", "encoder.decoder.encdec_attn_layers.0.conv_o.bias", "encoder.decoder.encdec_attn_layers.1.conv_q.weight", "encoder.decoder.encdec_attn_layers.1.conv_q.bias", "encoder.decoder.encdec_attn_layers.1.conv_k.weight", "encoder.decoder.encdec_attn_layers.1.conv_k.bias", "encoder.decoder.encdec_attn_layers.1.conv_v.weight", "encoder.decoder.encdec_attn_layers.1.conv_v.bias", "encoder.decoder.encdec_attn_layers.1.conv_o.weight", "encoder.decoder.encdec_attn_layers.1.conv_o.bias", "encoder.decoder.encdec_attn_layers.2.conv_q.weight", "encoder.decoder.encdec_attn_layers.2.conv_q.bias", "encoder.decoder.encdec_attn_layers.2.conv_k.weight", "encoder.decoder.encdec_attn_layers.2.conv_k.bias", "encoder.decoder.encdec_attn_layers.2.conv_v.weight", "encoder.decoder.encdec_attn_layers.2.conv_v.bias", "encoder.decoder.encdec_attn_layers.2.conv_o.weight", "encoder.decoder.encdec_attn_layers.2.conv_o.bias", "encoder.decoder.encdec_attn_layers.3.conv_q.weight", "encoder.decoder.encdec_attn_layers.3.conv_q.bias", "encoder.decoder.encdec_attn_layers.3.conv_k.weight", "encoder.decoder.encdec_attn_layers.3.conv_k.bias", "encoder.decoder.encdec_attn_layers.3.conv_v.weight", "encoder.decoder.encdec_attn_layers.3.conv_v.bias", "encoder.decoder.encdec_attn_layers.3.conv_o.weight", "encoder.decoder.encdec_attn_layers.3.conv_o.bias", "encoder.decoder.encdec_attn_layers.4.conv_q.weight", "encoder.decoder.encdec_attn_layers.4.conv_q.bias", "encoder.decoder.encdec_attn_layers.4.conv_k.weight", "encoder.decoder.encdec_attn_layers.4.conv_k.bias", "encoder.decoder.encdec_attn_layers.4.conv_v.weight", "encoder.decoder.encdec_attn_layers.4.conv_v.bias", "encoder.decoder.encdec_attn_layers.4.conv_o.weight", "encoder.decoder.encdec_attn_layers.4.conv_o.bias", "encoder.decoder.encdec_attn_layers.5.conv_q.weight", "encoder.decoder.encdec_attn_layers.5.conv_q.bias", "encoder.decoder.encdec_attn_layers.5.conv_k.weight", "encoder.decoder.encdec_attn_layers.5.conv_k.bias", "encoder.decoder.encdec_attn_layers.5.conv_v.weight", "encoder.decoder.encdec_attn_layers.5.conv_v.bias", "encoder.decoder.encdec_attn_layers.5.conv_o.weight", "encoder.decoder.encdec_attn_layers.5.conv_o.bias", "encoder.decoder.norm_layers_1.0.gamma", "encoder.decoder.norm_layers_1.0.beta", "encoder.decoder.norm_layers_1.1.gamma", "encoder.decoder.norm_layers_1.1.beta", "encoder.decoder.norm_layers_1.2.gamma", "encoder.decoder.norm_layers_1.2.beta", "encoder.decoder.norm_layers_1.3.gamma", "encoder.decoder.norm_layers_1.3.beta", "encoder.decoder.norm_layers_1.4.gamma", "encoder.decoder.norm_layers_1.4.beta", "encoder.decoder.norm_layers_1.5.gamma", "encoder.decoder.norm_layers_1.5.beta", "encoder.decoder.ffn_layers.0.conv_1.weight", "encoder.decoder.ffn_layers.0.conv_1.bias", "encoder.decoder.ffn_layers.0.conv_2.weight", "encoder.decoder.ffn_layers.0.conv_2.bias", "encoder.decoder.ffn_layers.1.conv_1.weight", "encoder.decoder.ffn_layers.1.conv_1.bias", "encoder.decoder.ffn_layers.1.conv_2.weight", "encoder.decoder.ffn_layers.1.conv_2.bias", "encoder.decoder.ffn_layers.2.conv_1.weight", "encoder.decoder.ffn_layers.2.conv_1.bias", "encoder.decoder.ffn_layers.2.conv_2.weight", "encoder.decoder.ffn_layers.2.conv_2.bias", "encoder.decoder.ffn_layers.3.conv_1.weight", "encoder.decoder.ffn_layers.3.conv_1.bias", "encoder.decoder.ffn_layers.3.conv_2.weight", "encoder.decoder.ffn_layers.3.conv_2.bias", "encoder.decoder.ffn_layers.4.conv_1.weight", "encoder.decoder.ffn_layers.4.conv_1.bias", "encoder.decoder.ffn_layers.4.conv_2.weight", "encoder.decoder.ffn_layers.4.conv_2.bias", "encoder.decoder.ffn_layers.5.conv_1.weight", "encoder.decoder.ffn_layers.5.conv_1.bias", "encoder.decoder.ffn_layers.5.conv_2.weight", "encoder.decoder.ffn_layers.5.conv_2.bias", "encoder.decoder.norm_layers_2.0.gamma", "encoder.decoder.norm_layers_2.0.beta", "encoder.decoder.norm_layers_2.1.gamma", "encoder.decoder.norm_layers_2.1.beta", "encoder.decoder.norm_layers_2.2.gamma", "encoder.decoder.norm_layers_2.2.beta", "encoder.decoder.norm_layers_2.3.gamma", "encoder.decoder.norm_layers_2.3.beta", "encoder.decoder.norm_layers_2.4.gamma", "encoder.decoder.norm_layers_2.4.beta", "encoder.decoder.norm_layers_2.5.gamma", "encoder.decoder.norm_layers_2.5.beta".

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions