Skip to content

Commit c1e26c1

Browse files
committed
feat: add encoder_fairseq_dir path to fine-tuning and inference scripts
1 parent bd5e5aa commit c1e26c1

File tree

7 files changed

+21
-0
lines changed

7 files changed

+21
-0
lines changed

examples/slam_aac/scripts/finetune_audiocaps.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ run_dir=/data/wenxi.chen/SLAM-LLM
99
cd $run_dir
1010
code_dir=examples/slam_aac
1111

12+
encoder_fairseq_dir=/fairseq/EAT # path to the fairseq directory of the encoder model
13+
1214
audio_encoder_path=/data/xiquan.li/models/EAT-base_epoch30_ft.pt
1315
llm_path=/data/xiquan.li/models/vicuna-7b-v1.5
1416

@@ -38,6 +40,7 @@ hydra.run.dir=$output_dir \
3840
++model_config.encoder_path=$audio_encoder_path \
3941
++model_config.encoder_dim=768 \
4042
++model_config.encoder_projector=linear \
43+
++model_config.encoder_fairseq_dir=$encoder_fairseq_dir \
4144
++dataset_config.encoder_projector_ds_rate=${encoder_projector_ds_rate} \
4245
++dataset_config.dataset=audio_dataset \
4346
++dataset_config.train_data_path=$train_jsonl_path \

examples/slam_aac/scripts/finetune_clotho.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ run_dir=/data/wenxi.chen/SLAM-LLM
99
cd $run_dir
1010
code_dir=examples/slam_aac
1111

12+
encoder_fairseq_dir=/fairseq/EAT # path to the fairseq directory of the encoder model
13+
1214
audio_encoder_path=/data/xiquan.li/models/EAT-base_epoch30_ft.pt
1315
llm_path=/data/xiquan.li/models/vicuna-7b-v1.5
1416

@@ -38,6 +40,7 @@ hydra.run.dir=$output_dir \
3840
++model_config.encoder_path=$audio_encoder_path \
3941
++model_config.encoder_dim=768 \
4042
++model_config.encoder_projector=linear \
43+
++model_config.encoder_fairseq_dir=$encoder_fairseq_dir \
4144
++dataset_config.encoder_projector_ds_rate=${encoder_projector_ds_rate} \
4245
++dataset_config.dataset=audio_dataset \
4346
++dataset_config.train_data_path=$train_jsonl_path \

examples/slam_aac/scripts/inference_audiocaps_CLAP_Refine.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ audio_encoder_path=/data/xiquan.li/models/EAT-base_epoch30_ft.pt
1010
llm_path=/data/xiquan.li/models/vicuna-7b-v1.5
1111
clap_dir=/data/xiquan.li/models/clap
1212

13+
encoder_fairseq_dir=/fairseq/EAT # path to the fairseq directory of the encoder model
14+
1315
encoder_projector_ds_rate=5
1416

1517
inference_data_path=/data/wenxi.chen/data/audiocaps/new_test.jsonl
@@ -41,6 +43,7 @@ for num_beams in "${beam_range[@]}"; do
4143
++model_config.encoder_projector=linear \
4244
++model_config.encoder_projector_ds_rate=$encoder_projector_ds_rate \
4345
++model_config.normalize=true \
46+
++model_config.encoder_fairseq_dir=$encoder_fairseq_dir \
4447
++dataset_config.encoder_projector_ds_rate=$encoder_projector_ds_rate \
4548
++dataset_config.dataset=audio_dataset \
4649
++dataset_config.val_data_path=$inference_data_path \

examples/slam_aac/scripts/inference_audiocaps_bs.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ run_dir=/data/wenxi.chen/SLAM-LLM
66
cd $run_dir
77
code_dir=examples/slam_aac
88

9+
encoder_fairseq_dir=/fairseq/EAT # path to the fairseq directory of the encoder model
10+
911
audio_encoder_path=/data/xiquan.li/models/EAT-base_epoch30_ft.pt
1012
llm_path=/data/xiquan.li/models/vicuna-7b-v1.5
1113

@@ -31,6 +33,7 @@ python $code_dir/inference_aac_batch.py \
3133
++model_config.encoder_projector=linear \
3234
++model_config.encoder_projector_ds_rate=$encoder_projector_ds_rate \
3335
++model_config.normalize=true \
36+
++model_config.encoder_fairseq_dir=$encoder_fairseq_dir \
3437
++dataset_config.encoder_projector_ds_rate=$encoder_projector_ds_rate \
3538
++dataset_config.dataset=audio_dataset \
3639
++dataset_config.val_data_path=$inference_data_path \

examples/slam_aac/scripts/inference_clotho_CLAP_Refine.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ run_dir=/data/wenxi.chen/SLAM-LLM
66
cd $run_dir
77
code_dir=examples/slam_aac
88

9+
encoder_fairseq_dir=/fairseq/EAT # path to the fairseq directory of the encoder model
10+
911
audio_encoder_path=/data/xiquan.li/models/EAT-base_epoch30_ft.pt
1012
llm_path=/data/xiquan.li/models/vicuna-7b-v1.5
1113
clap_dir=/data/xiquan.li/models/clap
@@ -41,6 +43,7 @@ for num_beams in "${beam_range[@]}"; do
4143
++model_config.encoder_projector=linear \
4244
++model_config.encoder_projector_ds_rate=$encoder_projector_ds_rate \
4345
++model_config.normalize=true \
46+
++model_config.encoder_fairseq_dir=$encoder_fairseq_dir \
4447
++dataset_config.encoder_projector_ds_rate=$encoder_projector_ds_rate \
4548
++dataset_config.dataset=audio_dataset \
4649
++dataset_config.val_data_path=$inference_data_path \

examples/slam_aac/scripts/inference_clotho_bs.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ run_dir=/data/wenxi.chen/SLAM-LLM
66
cd $run_dir
77
code_dir=examples/slam_aac
88

9+
encoder_fairseq_dir=/fairseq/EAT # path to the fairseq directory of the encoder model
10+
911
audio_encoder_path=/data/xiquan.li/models/EAT-base_epoch30_ft.pt
1012
llm_path=/data/xiquan.li/models/vicuna-7b-v1.5
1113

@@ -31,6 +33,7 @@ python $code_dir/inference_aac_batch.py \
3133
++model_config.encoder_projector=linear \
3234
++model_config.encoder_projector_ds_rate=$encoder_projector_ds_rate \
3335
++model_config.normalize=true \
36+
++model_config.encoder_fairseq_dir=$encoder_fairseq_dir \
3437
++dataset_config.encoder_projector_ds_rate=$encoder_projector_ds_rate \
3538
++dataset_config.dataset=audio_dataset \
3639
++dataset_config.val_data_path=$inference_data_path \

examples/slam_aac/scripts/pretrain.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ run_dir=/data/wenxi.chen/SLAM-LLM
99
cd $run_dir
1010
code_dir=examples/slam_aac
1111

12+
encoder_fairseq_dir=/fairseq/EAT # path to the fairseq directory of the encoder model
13+
1214
audio_encoder_path=/data/xiquan.li/models/EAT-base_epoch30_ft.pt
1315
llm_path=/data/xiquan.li/models/vicuna-7b-v1.5
1416

@@ -34,6 +36,7 @@ hydra.run.dir=$output_dir \
3436
++model_config.encoder_path=$audio_encoder_path \
3537
++model_config.encoder_dim=768 \
3638
++model_config.encoder_projector=linear \
39+
++model_config.encoder_fairseq_dir=$encoder_fairseq_dir \
3740
++dataset_config.encoder_projector_ds_rate=${encoder_projector_ds_rate} \
3841
++dataset_config.dataset=audio_dataset \
3942
++dataset_config.train_data_path=$train_jsonl_path \

0 commit comments

Comments
 (0)