fix model name, README and fsdp config for examples/sec_emotioncaps

xrysamuel · xrysamuel · commit c16e3a95f582 · 2024-11-05T14:28:27.000+08:00
diff --git a/examples/sec_emotioncaps/README.md b/examples/sec_emotioncaps/README.md
@@ -40,16 +40,3 @@ If you do have sufficient relevant data, you can train the model yourself.
 ```
 bash finetune_emotion2vec_qformer_vicuna_7b.sh
 ```
-
-##  Citation
-
-You can refer to the paper for more results. 
-
-```
-@article{ma2024embarrassingly,
-  title={An Embarrassingly Simple Approach for LLM with Strong ASR Capacity},
-  author={Ma, Ziyang and Yang, Guanrou and Yang, Yifan and Gao, Zhifu and Wang, Jiaming and Du, Zhihao and Yu, Fan and Chen, Qian and Zheng, Siqi and Zhang, Shiliang and others},
-  journal={arXiv preprint arXiv:2402.08846},
-  year={2024}
-}
-```
diff --git a/examples/sec_emotioncaps/model/slam_model_sec.py b/examples/sec_emotioncaps/model/slam_model_sec.py
@@ -25,7 +25,7 @@ def model_factory(train_config, model_config, **kwargs):
     encoder_projector = setup_encoder_projector(
         train_config, model_config, **kwargs
     )
-    model = slam_model_asr(
+    model = slam_model_sec(
         encoder,
         llm,
         encoder_projector,
@@ -55,7 +55,7 @@ def model_factory(train_config, model_config, **kwargs):
     return model, tokenizer
 
 
-class slam_model_asr(slam_model):
+class slam_model_sec(slam_model):
     def __init__(
         self,
         encoder,
diff --git a/examples/sec_emotioncaps/sec_config.py b/examples/sec_emotioncaps/sec_config.py
@@ -1,5 +1,8 @@
 from dataclasses import dataclass, field
 from typing import Optional, List
+
+from torch.distributed.fsdp import ShardingStrategy
+
 @dataclass
 class ModelConfig:
     file: str = "examples/sec_emotioncaps/model/slam_model_sec.py:model_factory"
@@ -108,8 +111,8 @@ class DataConfig:
 class FSDPConfig:
     mixed_precision: bool = True
     use_fp16: bool = False
-    # sharding_strategy: str = "FULL_SHARD" #ShardingStrategy = ShardingStrategy.FULL_SHARD
-    sharding_strategy: str = "NO_SHARD" #MZY: set NO_SHARD when use DDP
+    # sharding_strategy = "FULL_SHARD" #ShardingStrategy = ShardingStrategy.FULL_SHARD
+    sharding_strategy: ShardingStrategy = "NO_SHARD" #ShardingStrategy.NO_SHARD #MZY: set NO_SHARD when use DDP
     checkpoint_type: str = "SHARDED_STATE_DICT"  # alternatively can use SHARDED_STATE_DICT save one file per rank, and can resize the world-size.
     fsdp_activation_checkpointing: bool = True
     fsdp_cpu_offload: bool = False