|
| 1 | +#!/bin/bash |
| 2 | +#export PYTHONPATH=/root/whisper:$PYTHONPATH |
| 3 | +export CUDA_VISIBLE_DEVICES=2 |
| 4 | +export TOKENIZERS_PARALLELISM=false |
| 5 | +# export CUDA_LAUNCH_BLOCKING=1 |
| 6 | + |
| 7 | +run_dir=/nfs/yangguanrou.ygr/codes/SLAM-LLM |
| 8 | +cd $run_dir |
| 9 | +code_dir=examples/contextual_asr |
| 10 | + |
| 11 | +speech_encoder_path=/nfs/yangguanrou.ygr/ckpts/wavlm_large_ft_libri960_char/wavlm_large_ft_libri960_char.pt |
| 12 | +llm_path=/nfs/maziyang.mzy/models/vicuna-7b-v1.5 |
| 13 | + |
| 14 | +output_dir=/nfs/yangguanrou.ygr/experiments_librispeech/vicuna-7b-v1.5-WavLM-Large-libri960-ft-char-20240521 |
| 15 | +ckpt_path=$output_dir/asr_epoch_3_step_9780 |
| 16 | +N=100 |
| 17 | +for ref_split in test_clean test_other; do |
| 18 | + split=librispeech_${ref_split} |
| 19 | + val_data_path=/nfs/maziyang.mzy/data/librispeech/${split}.jsonl |
| 20 | + decode_log=$ckpt_path/decode_${split}_beam4_debug |
| 21 | + python $code_dir/inference_contextual_asr_batch.py \ |
| 22 | + --config-path "conf" \ |
| 23 | + --config-name "prompt.yaml" \ |
| 24 | + hydra.run.dir=$ckpt_path \ |
| 25 | + ++model_config.llm_name="vicuna-7b-v1.5" \ |
| 26 | + ++model_config.llm_path=$llm_path \ |
| 27 | + ++model_config.llm_dim=4096 \ |
| 28 | + ++model_config.encoder_name=wavlm \ |
| 29 | + ++model_config.normalize=true \ |
| 30 | + ++dataset_config.normalize=true \ |
| 31 | + ++model_config.encoder_projector_ds_rate=5 \ |
| 32 | + ++model_config.encoder_path=$speech_encoder_path \ |
| 33 | + ++model_config.encoder_dim=1024 \ |
| 34 | + ++model_config.encoder_projector=cov1d-linear \ |
| 35 | + ++dataset_config.dataset=speech_dataset \ |
| 36 | + ++dataset_config.val_data_path=$val_data_path \ |
| 37 | + ++dataset_config.input_type=raw \ |
| 38 | + ++dataset_config.inference_mode=true \ |
| 39 | + ++train_config.model_name=asr \ |
| 40 | + ++train_config.freeze_encoder=true \ |
| 41 | + ++train_config.freeze_llm=true \ |
| 42 | + ++train_config.batching_strategy=custom \ |
| 43 | + ++train_config.num_epochs=1 \ |
| 44 | + ++train_config.val_batch_size=1 \ |
| 45 | + ++train_config.num_workers_dataloader=0 \ |
| 46 | + ++train_config.output_dir=$output_dir \ |
| 47 | + ++decode_log=$decode_log \ |
| 48 | + ++ckpt_path=$ckpt_path/model.pt && \ |
| 49 | + python src/slam_llm/utils/whisper_tn.py ${decode_log}_gt ${decode_log}_gt.proc && \ |
| 50 | + python src/slam_llm/utils/whisper_tn.py ${decode_log}_pred ${decode_log}_pred.proc && \ |
| 51 | + python src/slam_llm/utils/compute_wer.py ${decode_log}_gt.proc ${decode_log}_pred.proc ${decode_log}.proc.wer && \ |
| 52 | + python /nfs/yangguanrou.ygr/data/fbai-speech/is21_deep_bias/my_score.py \ |
| 53 | + --refs /nfs/yangguanrou.ygr/data/fbai-speech/is21_deep_bias/ref_score/${ref_split}.biasing_${N}.tsv \ |
| 54 | + --hyps ${decode_log}_pred.proc \ |
| 55 | + --output_file ${decode_log}.proc.wer |
| 56 | +done |
0 commit comments