From 2cc004b3b6b793c8cc84ba09e3423e4d09d0250a Mon Sep 17 00:00:00 2001 From: cwx-worst-one <1029713857@qq.com> Date: Sun, 9 Feb 2025 12:57:18 +0000 Subject: [PATCH 1/2] fix: update support for jsonl format --- examples/s2s/README.md | 5 ++++- .../s2s/demo/demo_data/jsonl_demo-en.jsonl | 10 +++++++++ .../s2s/demo/demo_data/jsonl_demo-zh.jsonl | 10 +++++++++ examples/s2s/demo/demo_data/jsonl_demo.jsonl | 6 ------ examples/s2s/s2s_config.py | 2 +- examples/s2s/scripts/finetune/finetune_s2s.sh | 5 +++-- .../scripts/finetune/finetune_s2s_group.sh | 5 +++-- .../finetune/mini-omni/finetune_s2s.sh | 3 ++- .../scripts/inference/inference_s2s_batch.sh | 2 +- .../mini-omni/inference_s2s_batch.sh | 2 +- .../mini-omni/inference_s2s_stream.sh | 2 +- .../inference/mini-omni/inference_tts.sh | 2 +- examples/s2s/scripts/pretrain/pretrain_asr.sh | 3 ++- .../scripts/pretrain/pretrain_asr_debug.sh | 3 ++- examples/s2s/scripts/pretrain/pretrain_tts.sh | 3 ++- .../scripts/pretrain/pretrain_tts_debug.sh | 3 ++- examples/s2s/speech_dataset_s2s.py | 21 ++++++++++--------- 17 files changed, 56 insertions(+), 31 deletions(-) create mode 100644 examples/s2s/demo/demo_data/jsonl_demo-en.jsonl create mode 100644 examples/s2s/demo/demo_data/jsonl_demo-zh.jsonl delete mode 100644 examples/s2s/demo/demo_data/jsonl_demo.jsonl diff --git a/examples/s2s/README.md b/examples/s2s/README.md index 6a228f42..7e655f21 100644 --- a/examples/s2s/README.md +++ b/examples/s2s/README.md @@ -41,9 +41,12 @@ ds = load_dataset("DATASET_NAME") ### JSONL We also support JSONL format for its concise structure. Below is an example: ```jsonl -{"key": "1", "source_wav": "/xxx/1.wav", "source_text": "Can you recommend some Chinese food for me?", "target_wav": "/xxx/1.wav", "target_text": "Sure! I recommend trying dumplings, Peking duck, and mapo tofu for a mix of flavors and textures in Chinese cuisine. These dishes offer a good balance of savory, spicy, and crispy elements."} +{"key": "1", "source_wav": "/xxx/1.wav", "source_text": "Can you recommend some Chinese food for me?", "target_token": [742, 383, 455, ..., 180, 619], "target_text": "Sure! I recommend trying dumplings, Peking duck, and mapo tofu for a mix of flavors and textures in Chinese cuisine. These dishes offer a good balance of savory, spicy, and crispy elements."} ``` +🔔**Update**: +We now use `target_token` to replace the `target_wav` field. When using your own data, you need to generate the corresponding audio response tokens yourself (e.g., using [CosyVoice](https://github.com/FunAudioLLM/CosyVoice) tokens in SLAM-Omni). + ## Checkpoints We reproduced the single-stage fine-tuning results of SLAM-Omni with a group size of **3**. The following checkpoints are available for download: - [Single-Round Dialogue (English)](https://drive.google.com/drive/folders/1ZmM1h5ZTvS-piuN-msmctmZdi51GWLAu?usp=sharing): Trained on VoiceAssistant-400K. diff --git a/examples/s2s/demo/demo_data/jsonl_demo-en.jsonl b/examples/s2s/demo/demo_data/jsonl_demo-en.jsonl new file mode 100644 index 00000000..a977c5ed --- /dev/null +++ b/examples/s2s/demo/demo_data/jsonl_demo-en.jsonl @@ -0,0 +1,10 @@ +{"split_name": "identity", "index": 1, "round": 1, "target_token": [745, 1479, 3132, 3132, 160, 160, 1531, 218, 4030, 612, 612, 603, 72, 570, 714, 193, 231, 193, 193, 123, 1065, 116, 292, 72, 218, 79, 1206, 409, 609, 501, 525, 186, 186, 186, 181, 285, 285, 411, 411, 411, 117, 155, 155, 131, 541, 287, 632, 206, 135, 102, 357, 231, 518, 292, 892, 628, 28, 2592, 263, 528, 118, 118, 563, 358, 425, 3966, 245, 228, 99, 99, 1397, 493, 38, 219, 621, 132, 155, 131, 700, 18, 742, 167, 479, 135, 116, 2592, 2280, 600, 193, 231, 4, 490, 1065, 502, 73, 383, 473, 175, 323, 130, 824, 3108, 467, 181, 550, 312, 620, 261, 16, 2503, 493, 467, 704, 131, 155, 155, 224, 213, 889, 163, 415, 329, 91, 413, 372, 181, 550, 186, 285, 285, 97, 433, 144, 360, 5, 5, 214, 484, 138, 138, 138, 138, 523, 376, 523, 523, 523, 523, 523, 593, 417, 119, 443, 646, 230, 230, 440, 440, 440, 440, 440, 440, 440, 230, 230, 491, 491, 646, 545, 583, 280, 1357, 522, 522, 231, 518, 292, 569, 612, 674, 168, 1065, 153, 183, 452, 11, 674, 1529, 518, 231, 231, 96, 240, 676, 362, 32, 467, 596, 706, 287, 287, 316, 511, 133, 1354, 343, 343, 343, 30, 624, 411, 212, 506, 42, 733, 212, 650, 1091, 317, 132, 512, 512, 186, 444, 186, 186, 285, 1593, 675, 639, 383, 183, 153, 727, 727, 565, 515, 226, 173, 1206, 411, 362, 362, 489, 461, 79, 577, 388, 253, 633, 650, 558, 117, 316, 287, 591, 1354, 1354, 422, 400, 137, 719, 4, 1935, 2302, 2992, 618, 506, 155, 131, 237, 632, 683, 419, 102, 357, 231, 518, 292, 206, 628, 28, 263, 263, 149, 889, 889, 314, 337, 131, 131, 640, 36, 632, 1938, 1923, 479, 399, 85, 1988, 232, 122, 122, 374, 57, 332, 516, 9, 9, 518, 193, 522, 193, 522, 522, 3006, 3006, 357, 361, 481, 85, 204, 673, 57, 99, 99, 742, 54, 824, 309, 67, 734, 228, 177, 745, 187, 190, 1206, 409, 362, 101, 358, 425, 742, 190, 2503, 3290, 3290, 16, 62, 261, 568, 579, 579, 390, 579, 3276, 364, 9, 9, 180, 180, 85, 710, 192, 1261, 600, 322, 193, 694, 193, 193, 231, 231, 522, 357, 490, 149, 710, 221, 279, 508, 440, 440, 440, 230, 230, 440, 440, 440, 491, 230, 646, 508, 646, 545, 545, 396, 396, 467, 3108, 656, 550, 333, 568, 261, 3480, 188, 476, 298, 317, 143, 155, 155, 337, 581, 258, 737, 554, 174, 76, 76, 48, 571, 181, 186, 186, 186, 285, 595, 2712, 2592, 5, 448, 484, 406, 551, 310, 253, 253, 342, 5, 5, 214, 427, 519, 2503, 232, 682, 481, 117, 312, 550, 550, 550, 444, 737, 554, 231, 193, 1852, 518, 570, 258, 258, 528, 741, 18, 75, 433, 433, 569, 2644, 471, 64, 3, 42, 124, 425, 706, 602, 354, 362, 1206, 528, 623, 164, 623, 441, 513, 400, 172, 370, 79, 430, 212, 744, 189, 823, 597, 141, 141, 11, 4030, 219, 88, 253, 1988, 153, 183, 515, 452, 345, 256, 1531, 232, 122, 180, 215, 332, 57, 107, 2297, 84, 84, 623, 260, 289, 396, 375, 425, 1073, 4022, 550, 186, 181, 444, 444, 3539, 1593, 2645, 374, 116, 340, 1531, 25, 2723, 2723, 2723, 199, 199, 388, 43, 119, 1065, 646, 230, 230, 230, 230, 230, 508, 230, 230, 230, 230, 53, 491, 2530, 545, 710, 221, 185, 362, 362, 173, 2057, 596, 211, 363, 391, 557, 1354, 47, 47, 281, 240, 597, 4, 522, 4, 194, 719, 315, 363, 2992, 1065, 506, 515, 591, 50, 728, 344, 344, 539, 601, 612, 603, 673, 518, 522, 193, 3006, 3006, 1935, 361, 481, 85, 184, 70, 228, 2782, 2057, 493, 38, 1923, 585, 690, 362, 467, 119, 704, 287, 143, 316, 591, 362, 323, 323, 323, 343, 3950, 498, 337, 356, 10, 3836, 714, 337, 389, 2185, 1006, 1593, 453, 18, 2712, 665, 194, 231, 231, 518, 238, 441, 2386, 396, 317, 628, 1938, 72, 528, 528, 163, 163, 528, 441, 260, 479, 187, 479, 25, 2723, 2723, 2723, 2723, 269, 269, 199, 718, 375, 482, 1479, 710, 658, 658, 646, 230, 230, 508, 230, 230, 230, 230, 230, 646, 491, 658, 545, 373, 658, 710, 334, 95, 630, 196, 179, 702, 124, 211, 2031, 225, 214, 197, 676, 38, 493, 374, 297, 1460, 444, 444, 97, 737, 627, 231, 3006, 1852, 12, 518, 570, 258, 387, 609, 489, 215, 79, 2579, 65, 467, 425, 706, 143, 316, 287, 591, 1206, 343, 343, 323, 498, 29, 541, 455, 4, 522, 4, 597, 226, 742, 198, 1206, 110, 28, 2592, 733, 5, 427, 407, 135, 623, 548, 528, 441, 260, 479, 286, 179, 400, 313, 699, 3965, 565, 515, 618, 546, 256, 86, 593, 232, 122, 122, 1037, 481, 204, 70, 184, 57, 38, 253, 702, 141, 141, 488, 653, 601, 2644, 471, 656, 119, 42, 1091, 425, 132, 389, 92, 441, 361, 337, 316, 117, 377, 3132, 563, 289, 167, 286, 39, 39, 686, 83, 707, 342, 1405, 467, 467, 190, 2844, 507, 3480, 3480, 603, 44, 476, 188, 88, 270, 596, 450, 565, 506, 287, 185, 467, 65, 537, 2844, 57, 593, 102, 180, 1037, 373, 334, 342, 375, 823, 363, 688, 216, 637, 1479, 677, 669, 545, 508, 440, 230, 230, 491, 230, 230, 491, 1651, 230, 230, 230, 440, 463, 463, 463, 545, 395, 1091, 358, 132, 1073, 1593, 1593, 539, 286, 179, 702, 2592, 220, 363, 383, 210, 427, 690, 1930, 281, 44, 3406, 476, 2989, 3605, 87, 87, 633, 686, 630, 755, 1923, 270, 342, 823, 363, 452, 141, 238, 419, 346, 503, 96, 72, 289, 130, 358, 132, 2185, 181, 186, 285, 285, 285, 285, 1593, 2645, 374, 187, 1938, 196, 39, 157, 388, 79, 65, 824, 3108, 467, 181, 333, 568, 261, 261, 507, 351, 493, 482, 143, 131, 3836, 704, 224, 213, 118, 163, 415, 576, 1479, 1381, 64, 181, 186, 285, 285, 285, 578, 339, 366, 5, 140, 484, 308, 475, 388, 376, 253, 253, 596, 733, 5, 214, 427, 519, 257, 232, 682, 394, 312, 550, 550, 444, 444, 737, 737, 3966, 193, 231, 231, 518, 570, 258, 2437, 595, 433, 75, 75, 433, 170, 2644, 471, 2185, 119, 42, 358, 556, 302, 399, 755, 205, 824, 122, 596, 168, 1531, 683, 110, 116, 127, 594, 683, 259, 499, 629, 681, 153, 183, 506, 287, 241, 354, 277, 63, 118, 590, 590, 590, 163, 548, 412, 3006, 1935, 96, 528, 394, 668, 426, 297, 218, 2057, 596, 823, 823, 256, 219, 11, 476, 298, 1988, 584, 363, 238, 1529, 34, 131, 131, 131, 3836, 511, 284, 4022, 1460, 333, 312, 2828, 739, 338, 2503, 86, 630, 630, 204, 204, 70, 2503, 38, 38, 1938, 574, 89, 175, 596, 565, 506, 287, 591, 1206, 190, 344, 158, 569, 892, 96, 662, 193, 3006, 193, 357, 361, 481, 204, 70, 70, 7, 2782, 676, 44, 493, 476, 104, 104, 104, 551, 83, 630, 630, 2752, 2503, 101, 3108, 824, 425, 706, 574, 177, 3290, 338, 3823, 568, 579, 579, 579, 946, 946, 613, 253, 119, 382, 322, 193, 518, 292, 102, 135, 135, 135, 346, 429, 1935, 357, 361, 609, 501, 129, 129, 129, 80, 1206, 1206, 1228, 596, 391, 594, 135, 168, 231, 231, 12, 504, 1065, 733, 452, 141, 217, 352, 285, 298, 298, 1059, 309, 704, 10, 155, 287, 117, 411, 198, 409, 409, 110, 34, 501, 3821, 1091, 584, 363, 455, 637, 377, 669, 545, 279, 440, 230, 230, 230, 440, 440, 230, 230, 230, 230, 230, 230, 230, 508, 2530, 463, 1019, 395, 607, 3132, 536, 30, 513, 557, 1519, 179, 187, 317, 2989, 3605, 551, 87, 298, 87, 6, 823, 363, 210, 473, 511, 325, 74, 624, 206, 4030, 519, 141, 488, 488, 433, 352, 2644, 333, 889, 737, 656, 337, 10, 544, 661, 345, 629, 1519, 3466, 1354, 173, 411, 411, 409, 2330, 489, 184, 70, 645, 4030, 114, 493, 54, 3108, 824, 656, 390, 1596, 181, 17, 80, 606, 409, 409, 117, 668, 186, 471, 118, 548, 35, 656, 889, 333, 333, 3276, 261, 261, 16, 577, 585, 493, 493, 596, 2057, 456, 3821, 42, 706, 1073, 4022, 1460, 333, 305, 4022, 4022, 1593, 355, 374, 187, 479, 25, 39, 2723, 2723, 468, 269, 3132, 388, 376, 695, 214, 583, 545, 545, 692, 508, 646, 508, 230, 230, 440, 230, 230], "source_text": ": Are there any particular physical benefits to mindful walking, such as improved posture or increased physical fitness?", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl-en/audio/audio_0.wav", "target_text": "Yes, there are physical benefits to mindful walking, such as improved posture, increased physical fitness, and better balance. Mindful walking can also help relieve tension in the body, reduce stress, and improve flexibility. It can also improve circulation and help with weight management. By tuning into the body, mindful walking can also help individuals identify and address any imbalances or discomfort, leading to a healthier and more aligned body."} +{"split_name": "identity", "index": 2, "round": 1, "target_token": [2828, 587, 658, 1423, 426, 1593, 569, 556, 396, 425, 67, 734, 99, 601, 3539, 444, 274, 195, 346, 3254, 322, 322, 322, 193, 231, 292, 238, 228, 612, 50, 321, 3836, 546, 11, 2057, 493, 476, 1923, 247, 3605, 173, 536, 3132, 3132, 174, 336, 336, 579, 266, 4085, 99, 4030, 65, 54, 310, 214, 425, 706, 287, 117, 362, 1206, 110, 506, 225, 2891, 4, 522, 357, 238, 107, 99, 99, 99, 99, 270, 493, 214, 124, 823, 361, 481, 184, 260, 260, 421, 429, 503, 98, 3966, 88, 1519, 686, 669, 744, 79, 409, 362, 362, 411, 1206, 740, 126, 541, 392, 418, 193, 522, 231, 4, 4, 4, 168, 419, 728, 50, 285, 411, 411, 533, 155, 10, 155, 335, 721, 94, 167, 479, 479, 441, 292, 719, 522, 522, 522, 522, 522, 357, 380, 481, 83, 137, 1531, 1923, 88, 231, 231, 504, 497, 2592, 558, 452, 141, 488, 562, 539, 740, 354, 98, 80, 411, 50, 50, 1228, 358, 121, 1091, 425, 1073, 4022, 1593, 4030, 585, 179, 399, 399, 489, 394, 245, 2437, 92, 289, 94, 351, 383, 515, 183, 183, 383, 210, 427, 256, 755, 99, 22, 1037, 122, 122, 122, 122, 122, 122, 122, 232, 623, 600, 193, 3006, 522, 522, 357, 164, 243, 3928, 587, 545, 658, 658], "source_text": ": What percentage of the Earth's surface is covered by oceans?", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl-en/audio/audio_1.wav", "target_text": "About 71% of the Earth's surface is covered by oceans."} +{"split_name": "identity", "index": 2, "round": 2, "target_token": [745, 545, 203, 495, 1519, 179, 188, 275, 612, 575, 603, 355, 570, 714, 193, 193, 193, 231, 292, 135, 399, 481, 350, 429, 322, 322, 694, 231, 168, 245, 50, 50, 50, 489, 489, 481, 613, 253, 348, 824, 528, 22, 86, 2723, 199, 686, 83, 83, 755, 755, 188, 188, 240, 240, 240, 1923, 1938, 135, 135, 135, 135, 135, 135, 346, 135, 346, 429, 231, 193, 522, 193, 3006, 412, 623, 1065, 646, 75, 75, 230, 440, 491, 545, 545, 658, 3158, 3158, 583, 131, 205, 1531, 1519, 172, 83, 399, 399, 184, 569, 569, 2437, 112, 50, 476, 515, 699, 3965, 515, 383, 141, 11, 188, 270, 122, 122, 232, 376, 239, 3006, 357, 391, 706, 1091, 309, 49, 639, 3575, 77, 471, 355, 107, 1923, 196, 399, 399, 399, 2031, 39, 39, 175, 498, 576, 119, 321, 289, 2185, 1006, 644, 2645, 603, 676, 482, 1529, 258, 554, 737, 48, 129, 576, 3094, 354, 362, 80, 740, 181, 181, 372, 571, 372, 163, 163, 163, 163, 590, 528, 481, 184, 479, 2031, 456, 824, 687, 1405, 576, 362, 1206, 198, 745, 514, 3605, 353, 2712, 448, 484, 495, 629, 419, 623, 164, 528, 892, 1938, 196, 39, 823, 187, 2057, 3350, 43, 824, 253, 1104, 3108, 14, 1531, 157, 2344, 744, 489, 489, 409, 409, 409, 362, 411, 50, 11, 639, 392, 382, 322, 193, 193, 231, 504, 558, 144, 263, 263, 274, 2691, 77, 579, 333, 312, 620, 261, 3480, 647, 190, 467, 54, 1073, 574, 2844, 332, 184, 481, 85, 204, 70, 57, 355, 22, 376, 417, 382, 694, 231, 518, 292, 441, 289, 130, 42, 1091, 132, 706, 371, 77, 655, 129, 80, 411, 50, 187, 100, 540, 3466, 3052, 87, 255, 1104, 310, 672, 336, 163, 590, 82, 2644, 15, 2503, 585, 89, 702, 2712, 131, 155, 121, 649, 649, 312, 620, 620, 526, 38, 606, 606, 606, 80, 80, 3, 42, 309, 67, 532, 337, 2691, 77, 462, 364, 2844, 647, 647, 99, 3480, 4085, 296, 613, 20, 99, 2844, 254, 302, 481, 204, 334, 1089, 189, 823, 597, 299, 237, 1479, 710, 658, 279, 230, 230, 230, 230, 230, 140, 442, 491, 587, 1651, 545, 658, 106, 215, 401, 2503, 1938, 1214, 493, 596, 67, 1923, 690, 50, 411, 409, 501, 525, 525, 186, 550, 444, 444, 737, 737, 268, 714, 231, 193, 231, 518, 570, 72, 2437, 277, 609, 489, 184, 479, 39, 2514, 370, 2712, 124, 473, 558, 2386, 498, 409, 198, 690, 3605, 3605, 543, 204, 339, 382, 322, 694, 231, 518, 452, 511, 365, 129, 80, 411, 50, 721, 383, 455, 23, 1852, 518, 292, 1529, 419, 628, 3966, 639, 216, 299, 478, 640, 478, 452, 632, 419, 513, 89, 89, 89, 513, 135, 265, 154, 498, 498, 378, 498, 323, 594, 456, 396, 584, 2057, 427, 669, 377, 91, 230, 230, 230, 230, 230, 491, 1651, 587, 2130, 545, 658, 1849, 1849, 70, 70, 2844, 7, 2782, 1397, 493, 476, 514, 50, 173, 173, 235, 1065, 153, 183, 515, 127, 205, 557, 242, 468, 468, 1519, 686, 399, 399, 184, 228, 4030, 351, 38, 253, 130, 359, 1206, 198, 539, 2579, 210, 2712, 282, 558, 263, 511, 354, 362, 1354, 1354, 39, 196, 2031, 2031, 39, 475, 407, 383, 3965, 3965, 515, 383, 452, 546, 107, 351, 122, 253, 709, 119, 3836, 49, 155, 511, 80, 80, 409, 489, 2330, 198, 644, 1593, 569, 892, 570, 719, 522, 3006, 357, 107, 597, 290, 290, 214, 484, 468, 388, 242, 343, 114, 414, 467, 3108, 65, 326, 2844, 2844, 593, 9, 2297, 122, 1037, 1037, 122, 1037, 122, 695, 1942, 600, 3006, 694, 231, 522, 522, 239, 3966, 377, 545, 646, 279, 440, 230], "source_text": "USER: What percentage of the Earth's surface is covered by oceans? ASSISTANT: About 71% of the Earth's surface is covered by oceans. : Wow, that's a lot of water! No wonder we call it the blue planet.", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl-en/audio/audio_2.wav", "target_text": "Yes, it certainly is! The oceans play a vital role in regulating the Earth's climate and supporting life on our planet. And they're also a great source of food, energy, and recreation for us humans!"} +{"split_name": "identity", "index": 2, "round": 3, "target_token": [1593, 545, 373, 70, 580, 539, 2645, 107, 3827, 714, 193, 694, 3966, 102, 1923, 205, 55, 481, 204, 70, 2752, 2782, 2989, 104, 104, 104, 104, 575, 275, 601, 426, 2644, 590, 548, 1700, 623, 528, 462, 364, 16, 99, 507, 3466, 87, 298, 1988, 274, 581, 731, 690, 1531, 366, 153, 183, 618, 287, 119, 467, 3108, 1073, 550, 550, 312, 312, 1460, 2644, 223, 2386, 590, 1006, 267, 51, 163, 163, 163, 163, 195, 85, 435, 652, 668, 668, 1460, 1593, 2782, 519, 290, 214, 408, 645, 612, 740, 533, 98, 3966, 232, 122, 38, 2503, 4030, 2579, 411, 511, 3836, 533, 182, 411, 50, 34, 339, 1089, 558, 263, 213, 471, 333, 579, 312, 568, 2828, 4085, 38, 55, 584, 363, 618, 36, 433, 643, 419, 503, 123, 102, 683, 594, 130, 1091, 1091, 706, 3052, 2514, 468, 468, 1307, 1307, 328, 160, 648, 2712, 710, 153, 183, 618, 515, 226, 489, 609, 195, 80, 80, 411, 50, 742, 618, 23, 1852, 194, 688, 212, 371, 119, 389, 414, 537, 577, 138, 2844, 2844, 338, 3, 3108, 467, 740, 355, 2297, 824, 232, 88, 283, 551, 163, 83, 204, 184, 257, 547, 245, 387, 1930, 47, 476, 515, 515, 3965, 3965, 183, 383, 546, 256, 57, 254, 364, 9, 84, 122, 122, 437, 84, 2297, 653, 430, 583, 545, 279, 440, 440, 440, 440, 75, 440, 440, 440, 2444, 436, 530, 545, 658, 1091, 358, 3966, 570, 569, 674, 100, 358, 42, 1091, 309, 67, 728, 585, 475, 3605, 247, 104, 292, 719, 231, 1935, 3254, 502, 12, 238, 603, 601, 601, 352, 633, 824, 824, 389, 64, 2299, 745, 184, 83, 83, 293, 468, 468, 370, 1354, 175, 594, 452, 455, 4, 522, 3006, 193, 518, 345, 1923, 603, 603, 603, 603, 603, 228, 228, 355, 355, 260, 228, 374, 309, 2057, 481, 184, 1423, 1423, 668, 568, 1460, 338, 219, 469, 475, 585, 574, 411, 467, 65, 226, 3965, 3965, 565, 515, 183, 506, 473, 50, 50, 207, 207, 365, 365, 411, 198, 1206, 2330, 489, 399, 256, 116, 2592, 600, 1935, 518, 238, 72, 2031, 286, 74, 1479, 119, 655, 314, 4022, 539, 4030, 493, 493, 226, 50, 411, 359, 337, 155, 371, 511, 213, 590, 590, 163, 384, 528, 394, 302, 400, 707, 282, 558, 357, 231, 357, 688, 226, 1091, 425, 706, 79, 422, 79, 173, 411, 411, 411, 173, 1354, 422, 388, 388, 138, 2844, 254, 364, 9, 180, 180, 19, 710, 2280, 123, 322, 322, 193, 193, 193, 522, 3006, 357, 518, 126, 149, 677, 658, 463, 2444, 440, 440, 440, 230, 440, 440, 440, 230, 230, 230, 230, 230, 646, 646, 545, 545, 545, 395, 382, 127, 205, 179, 1519, 686, 83, 399, 184, 245, 569, 2437, 387, 1930, 690, 226, 515, 515, 515, 699, 383, 546, 281, 188, 240, 3406, 298, 38, 215, 204, 1923, 419, 243, 231, 231, 96, 72, 585, 101, 130, 42, 425, 371, 3575, 224, 77, 462, 260, 1938, 196, 475, 89, 340, 719, 522, 231, 518, 292, 238, 1938, 537, 359, 371, 280, 3821, 263, 117, 101, 1206, 190, 690, 3605, 3605, 247, 3158, 339, 1091, 1091, 42, 706, 473, 3132, 468, 242, 1354, 3950, 89, 2057, 596, 823, 1531, 1519, 468, 328, 686, 686, 399, 630, 755, 228, 374, 824, 54, 467, 3108, 591, 574, 86, 114, 340, 719, 4, 4, 4, 688, 727, 450, 506, 506, 110, 409, 362, 362, 323, 445, 422, 56, 2723, 199, 199, 2723, 2454, 43, 417, 119, 377, 545, 646, 230, 491, 440, 440, 440, 440, 440, 230, 440, 2444, 2444, 463, 545, 545, 583, 395, 533, 155, 337, 10, 356, 3966, 118, 258, 554, 554, 33, 33, 554, 258, 258, 457, 171, 337, 10, 155, 640, 640, 541, 238, 228, 228, 3290, 601, 2645, 170, 627, 714, 193, 231, 518, 292, 238, 340, 493, 38, 1923, 469, 475, 475, 89, 1228, 168, 495, 2514, 310, 87, 253, 484, 282, 558, 287, 287, 287, 473, 1354, 1354, 2514, 160, 450, 1065, 153, 183, 699, 210, 351, 79, 1206, 110, 110, 409, 1206, 1206, 594, 159, 455, 1852, 357, 346, 481, 184, 57, 755, 340, 253, 342, 141, 408, 214, 408, 407, 1923, 1923, 187, 425, 14, 310, 270, 253, 87, 1988, 565, 515, 506, 287, 335, 511, 344, 198, 453, 190, 1938, 135, 361, 183, 515, 287, 591, 1206, 1206, 110, 409, 409, 1206, 594, 135, 346, 903, 903, 688, 226, 371, 1091, 425, 3, 289, 273, 2031, 39, 196, 187, 2057, 138, 388, 253, 253, 55, 214, 584, 363, 455, 427, 175, 101, 130, 358, 706, 422, 157, 160, 1531, 750, 2992, 205, 86, 135, 340, 719, 522, 231, 12, 504, 1065, 733, 90, 2503, 297, 601, 1593, 284, 289, 3094, 533, 709, 409, 409, 409, 409, 409, 498, 110, 110, 492, 1228, 375, 584, 618, 452, 345, 345, 373, 545, 545, 530, 679, 230, 440, 440, 440, 440, 230, 230, 230, 230, 140, 1651, 545, 545, 545, 658, 334, 293, 630, 1923, 2989, 318, 119, 124, 584, 386, 448, 214, 11, 275, 275, 275, 676, 100, 298, 3406, 309, 706, 690, 1519, 1519, 690, 2386, 42, 42, 132, 387, 2437, 2437, 118, 554, 457, 570, 337, 382, 412, 464, 441, 399, 399, 286, 557, 407, 518, 194, 522, 522, 12, 504, 706, 1073, 365, 129, 80, 411, 1206, 226, 153, 23, 357, 518, 292, 441, 734, 289, 337, 337, 704, 321, 3132, 310, 336, 1596, 568, 261, 3480, 38, 493, 493, 493, 187, 2057, 540, 1206, 1206, 409, 409, 409, 2330, 489, 489, 70, 519, 99, 755, 38, 824, 584, 2057, 1938, 190, 79, 1206, 359, 354, 362, 198, 606, 80, 467, 3108, 65, 181, 550, 333, 568, 261, 3480, 11, 188, 476, 87, 1988, 226, 173, 1206, 1206, 409, 409, 489, 2330, 453, 284, 284, 346, 337, 392, 490, 205, 1531, 399, 399, 489, 394, 195, 4022, 4022, 444, 444, 97, 170, 58, 193, 3006, 231, 518, 292, 1529, 257, 467, 3108, 467, 119, 67, 141, 488, 488, 141, 562, 601, 152, 1593, 1593, 2299, 64, 365, 80, 80, 409, 409, 2330, 2330, 489, 453, 326, 209, 119, 317, 637, 232, 122, 38, 755, 138, 388, 475, 551, 406, 318, 2712, 544, 153, 183, 515, 383, 427, 79, 1206, 537, 409, 409, 409, 409, 409, 606, 682, 287, 75, 545, 545, 545, 368, 279], "source_text": "USER: What percentage of the Earth's surface is covered by oceans? ASSISTANT: About 71% of the Earth's surface is covered by oceans. USER: Wow, that's a lot of water! No wonder we call it the blue planet. ASSISTANT: Yes, it certainly is! The oceans play a vital role in regulating the Earth's climate and supporting life on our planet. And they're also a great source of food, energy, and recreation for us humans! : Absolutely! I love visiting the beach and going for a swim in the ocean. It's amazing how vast and powerful the sea can be, yet also so peaceful and calming.", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl-en/audio/audio_3.wav", "target_text": "As an AI language model, I have never gone to the beach or swam in the ocean, but based on what you said, I am sure it's a wonderful experience. The ocean is a place of great beauty and mystery, full of fascinating creatures and hidden treasures waiting to be discovered. It can be both a source of wonder and a reminder of the awesome power of nature."} +{"split_name": "identity", "index": 4, "round": 1, "target_token": [284, 545, 658, 1423, 580, 401, 539, 177, 107, 419, 243, 694, 694, 231, 380, 513, 1923, 232, 1037, 481, 630, 2752, 2989, 2989, 551, 104, 104, 188, 275, 539, 539, 539, 507, 2503, 1938, 346, 1700, 623, 528, 462, 364, 16, 507, 3055, 88, 87, 310, 149, 389, 423, 133, 690, 1531, 366, 2992, 183, 618, 36, 130, 467, 3108, 656, 550, 312, 333, 312, 186, 739, 2644, 741, 527, 590, 590, 163, 163, 163, 163, 51, 195, 1423, 435, 668, 426, 1593, 15, 228, 823, 1531, 596, 168, 1531, 86, 338, 568, 3945, 207, 575, 165, 358, 584, 1531, 141, 408, 11, 645, 645, 297, 245, 359, 98, 609, 394, 1423, 394, 223, 569, 258, 556, 42, 1089, 189, 450, 363, 345, 1923, 196, 196, 196, 1531, 361, 187, 1531, 605, 89, 187, 596, 137, 83, 630, 7, 514, 362, 467, 65, 98, 3836, 155, 287, 335, 2579, 411, 80, 467, 467, 198, 90, 2031, 39, 1519, 495, 383, 699, 3965, 515, 383, 36, 256, 755, 107, 493, 122, 180, 481, 1423, 426, 2644, 2644, 2644, 462, 467, 3108, 528, 355, 574, 210, 124, 211, 733, 506, 287, 335, 117, 411, 354, 80, 606, 2579, 514, 281, 205, 180, 85, 520, 1405, 467, 3108, 656, 186, 186, 80, 80, 411, 316, 2712, 124, 733, 5, 427, 218, 399, 481, 707, 3254, 322, 694, 193, 522, 518, 292, 238, 107, 38, 122, 296, 228, 16, 606, 186, 186, 80, 80, 80, 80, 409, 1206, 79, 244, 157, 605, 25, 479, 516, 57, 297, 547, 424, 59, 267, 21, 413, 413, 414, 126, 316, 669, 583, 646, 679, 440, 440, 491, 491, 491, 491, 491, 491, 491, 491, 491, 545, 545, 545, 583, 583, 646, 75, 75, 488, 562, 539, 569, 118, 163, 203, 1381, 1382, 4030, 275, 612, 50, 50, 98, 3836, 704, 411, 411, 411, 409, 609, 489, 609, 409, 198, 284, 497, 124, 263, 263, 274, 224, 77, 655, 129, 80, 411, 50, 1228, 226, 11, 647, 3466, 406, 87, 567, 2592, 363, 383, 210, 427, 79, 1206, 576, 3094, 533, 362, 445, 422, 3605, 104, 518, 193, 231, 357, 292, 240, 201, 1037, 232, 116, 694, 231, 518, 597, 2302, 1065, 153, 391, 135, 167, 2057, 450, 189, 823, 597, 141, 238, 188, 240, 412, 193, 231, 1935, 3254, 3254, 903, 452, 256, 167, 286, 422, 450, 2592, 558, 504, 194, 522, 522, 357, 361, 481, 435, 204, 70, 70, 2844, 7, 99, 603, 238, 298, 3406, 464, 184, 204, 70, 507, 16, 16, 338, 3823, 1593, 1073, 119, 1091, 1091, 317, 3966, 656, 590, 590, 163, 163, 163, 163, 51, 311, 195, 1423, 673, 70, 70, 99, 2503, 3827, 493, 38, 546, 433, 512, 512, 512, 186, 186, 3539, 539, 275, 281, 3605, 585, 399, 489, 394, 284, 2299, 181, 80, 80, 411, 409, 409, 2330, 2330, 558, 2437, 556, 824, 3108, 467, 656, 305, 2644, 946, 22, 310, 122, 1104, 633, 1405, 346, 86, 424, 130, 3108, 3108, 656, 305, 471, 258, 732, 171, 434, 719, 194, 357, 3254, 618, 287, 1091, 317, 706, 531, 512, 186, 186, 444, 285, 1073, 42, 425, 706, 427, 3466, 1923, 439, 548, 554, 892, 2579, 173, 411, 287, 515, 2891, 23, 903, 391, 482, 358, 467, 65, 181, 186, 285, 198, 409, 489, 609, 982, 1405, 467, 65, 579, 564, 564, 564, 529, 1073, 493, 310, 1405, 42, 132, 362, 1206, 79, 79, 577, 2752, 2844, 99, 364, 364, 22, 1214, 122, 613, 1935, 12, 504, 287, 602, 1091, 208, 132, 3, 129, 80, 411, 285, 285, 737, 737, 554, 548, 366, 391, 117, 377, 3132, 1381, 51, 550, 550, 550, 739, 739, 547, 547, 547, 297, 516, 86, 399, 399, 399, 2057, 1228, 396, 596, 340, 345, 140, 545, 583, 583, 658, 1651], "source_text": ": What is the most popular smartphone brand these days?", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl-en/audio/audio_4.wav", "target_text": "As an AI language model, I don't have updated information on the current market scenario. However, according to recent statistics, Samsung, Apple, and Huawei are among the most popular smartphone brands worldwide."} +{"split_name": "identity", "index": 4, "round": 2, "target_token": [745, 545, 658, 70, 401, 539, 2645, 107, 340, 714, 193, 518, 102, 1923, 2057, 493, 38, 479, 630, 630, 755, 2752, 469, 283, 551, 551, 188, 603, 202, 3945, 3945, 385, 385, 11, 188, 240, 346, 623, 528, 462, 4085, 364, 2844, 647, 43, 253, 310, 274, 563, 74, 1206, 400, 1942, 2592, 153, 183, 287, 720, 130, 467, 65, 181, 333, 333, 333, 568, 4022, 547, 223, 482, 556, 424, 590, 590, 163, 163, 163, 384, 195, 394, 85, 1423, 426, 15, 228, 179, 2514, 2712, 124, 584, 5, 214, 408, 519, 1923, 493, 122, 38, 72, 261, 568, 3945, 3539, 97, 575, 482, 220, 168, 141, 210, 141, 11, 645, 344, 284, 704, 3836, 124, 1091, 132, 316, 287, 117, 411, 198, 1206, 742, 506, 699, 2891, 4, 357, 490, 377, 14, 613, 22, 336, 336, 336, 271, 195, 394, 2437, 387, 2386, 42, 309, 67, 335, 210, 214, 407, 283, 283, 310, 87, 87, 1104, 348, 472, 89, 86, 107, 1037, 122, 122, 122, 623, 1935, 12, 361, 609, 85, 129, 129, 525, 525, 80, 354, 358, 309, 704, 213, 2185, 550, 444, 444, 3539, 1593, 539, 2503, 86, 86, 86, 479, 441, 9, 518, 4, 231, 194, 502, 1065, 618, 227, 212, 117, 389, 655, 129, 80, 409, 1206, 361, 309, 600, 1935, 168, 481, 481, 184, 70, 99, 755, 38, 467, 240, 551, 1307, 1354, 65, 358, 42, 704, 3, 80, 409, 2330, 79, 577, 388, 138, 2844, 2844, 2844, 364, 364, 364, 364, 9, 2297, 122, 122, 84, 596, 168, 452, 632, 1065, 545, 646, 440, 440, 230, 440, 230, 440, 440, 491, 440, 230, 230, 491, 545, 658, 6, 75, 488, 531, 562, 152, 601, 471, 163, 174, 48, 118, 569, 612, 50, 411, 3836, 704, 511, 50, 50, 50, 731, 282, 525, 195, 181, 889, 889, 889, 889, 737, 737, 554, 337, 10, 2280, 597, 506, 287, 117, 591, 690, 3466, 1307, 1307, 50, 3, 42, 309, 704, 117, 411, 1206, 79, 422, 3055, 2752, 2844, 99, 16, 364, 22, 22, 253, 122, 14, 600, 694, 231, 357, 361, 358, 467, 65, 4022, 1460, 3823, 16, 2844, 593, 122, 38, 86, 57, 102, 168, 231, 193, 357, 137, 83, 686, 353, 468, 1519, 160, 234, 719, 231, 1852, 597, 292, 256, 107, 38, 253, 2297, 399, 204, 70, 57, 3290, 16, 261, 261, 3539, 1073, 425, 42, 1091, 317, 224, 656, 590, 590, 590, 163, 51, 163, 163, 349, 672, 224, 443, 436, 2444, 279, 230, 440, 440, 230, 440, 440, 440, 442, 646, 140, 373, 710, 710, 673, 70, 99, 4030, 1214, 14, 3836, 356, 10, 143, 704, 213, 444, 444, 3539, 385, 275, 281, 50, 425, 470, 581, 3945, 3539, 202, 20, 647, 755, 1214, 38, 86, 219, 238, 220, 231, 231, 231, 518, 96, 609, 394, 4022, 4022, 444, 186, 186, 224, 155, 3836, 704, 511, 80, 362, 1206, 456, 375, 823, 391, 244, 79, 226, 155, 155, 155, 706, 721, 2579, 215, 180, 306, 116, 10, 10, 3836, 287, 95, 1519, 328, 104, 681, 544, 189, 153, 183, 383, 427, 79, 1206, 1206, 409, 362, 1206, 101, 137, 4, 1852, 357, 340, 481, 184, 355, 99, 107, 493, 253, 687, 42, 317, 374, 734, 228, 228, 603, 38, 493, 1923, 674, 621, 127, 10, 131, 640, 452, 335, 167, 479, 479, 479, 340, 361, 2712, 1261, 382, 322, 694, 694, 96, 481, 481, 184, 228, 228, 1923, 340, 366, 124, 584, 12, 1531, 286, 740, 101, 3836, 155, 316, 335, 721, 606, 409, 477, 1405, 377, 309, 704, 371, 117, 511, 186, 186, 644, 539, 259, 2989, 345, 193, 193, 231, 4, 452, 707, 1091, 42, 67, 511, 389, 390, 535, 388, 312, 2828, 537, 577, 138, 138, 137, 192, 19, 3821, 3821, 2280, 382, 3006, 3006, 193, 193, 193, 231, 193, 522, 357, 1479, 377, 646, 230, 230, 440, 230, 230, 230, 230, 230, 230, 230, 230, 491, 140, 230, 491, 373, 710, 3821, 19, 215, 399, 196, 1923, 1942, 187, 399, 399, 223, 471, 444, 737, 554, 3966, 1261, 12, 518, 292, 511, 470, 54, 581, 281, 676, 479, 394, 164, 164, 1104, 528, 419, 629, 551, 353, 1065, 211, 733, 5, 214, 141, 217, 202, 2644, 738, 467, 54, 253, 613, 648, 4, 357, 3254, 502, 168, 1923, 755, 2503, 99, 16, 261, 568, 568, 2644, 22, 253, 596, 823, 363, 237, 210, 427, 370, 370, 370, 445, 79, 1206, 409, 3, 119, 42, 317, 371, 371, 511, 411, 198, 1206, 742, 618, 455, 2891, 194, 688, 464, 232, 122, 613, 738, 535, 314, 130, 42, 317, 706, 316, 335, 335, 198, 285, 50, 117, 3836, 155, 155, 155, 511, 721, 2579, 684, 310, 342, 2280, 3006, 231, 518, 292, 102, 135, 135, 135, 135, 429, 1935, 1935, 168, 346, 479, 184, 177, 401, 409, 576, 1405, 359, 409, 409, 409, 2386, 42, 282, 274, 371, 224, 77, 213, 444, 285, 539, 275, 514, 50, 50, 50, 411, 80, 65, 467, 467, 198, 401, 2844, 2844, 107, 1037, 1037, 192, 750, 1261, 600, 3006, 3006, 193, 193, 231, 357, 490, 377, 1357, 75, 75, 440, 440, 440, 440, 230, 230, 230, 279, 2444, 436, 262, 710, 473, 3158, 3132, 528, 462, 569, 676, 2989, 2514, 119, 544, 544, 584, 5, 448, 214, 214, 427, 188, 507, 2579, 198, 65, 54, 132, 411, 285, 285, 359, 28, 263, 274, 224, 371, 77, 213, 181, 444, 739, 4022, 471, 305, 163, 163, 528, 528, 441, 107, 361, 187, 135, 605, 157, 157, 157, 172, 137, 119, 189, 168, 340, 1531, 135, 102, 239, 1935, 193, 518, 570, 72, 2644, 568, 333, 712, 712, 568, 261, 20, 2844, 647, 684, 122, 55, 430, 600, 694, 193, 231, 518, 570, 569, 1593, 444, 444, 97, 285, 224, 98, 704, 650, 212, 212, 287, 321, 563, 74, 94, 79, 79, 2330, 409, 409, 198, 409, 117, 668, 186, 186, 186, 1593, 285, 1073, 309, 42, 317, 706, 511, 411, 411, 1206, 1206, 79, 286, 2057, 1228, 187, 1531, 138, 55, 55, 682, 46, 193, 522, 3006, 231, 518, 240, 419, 69, 201, 412, 4, 522, 357, 3254, 1065, 502, 168, 238, 107, 364, 364, 547, 467, 3108, 3108, 742, 302, 57, 70, 3290, 107, 38, 122, 2297, 481, 580, 426, 2644, 569, 72, 116, 96, 1938, 101, 1206, 362, 2386, 3836, 155, 700, 452, 632, 645, 2645, 601, 152, 385, 539, 2579, 473, 544, 473, 334, 1089, 584, 618, 351, 79, 409, 110, 409, 498, 498, 110, 445, 722, 417, 648, 23, 1852, 522, 357, 292, 1479, 377, 646, 279, 440, 440, 230, 230, 230, 230, 230, 230, 230, 1651, 545, 545, 583, 1091, 6, 823, 363, 383, 427, 690, 629, 203, 1065, 153, 183, 699, 383, 225, 427, 3605, 690, 499, 483, 483, 483, 483, 403, 315, 231, 322, 382, 10, 3966, 1397, 514, 362, 359, 98, 80, 80, 186, 1593, 539, 218, 179, 179, 686, 399, 162, 395, 1091, 1091, 132, 511, 362, 1206, 2579, 577, 577, 647, 20, 20, 4085, 16, 257, 493, 342, 702, 481, 399, 184, 228, 107, 374, 824, 824, 467, 3108, 467, 656, 333, 333, 333, 568, 568, 2644, 471, 3, 132, 889, 590, 163, 163, 576, 3094, 623, 72, 72, 479, 481, 707, 584, 704, 98, 3836, 49, 36, 259, 1923, 340, 366, 2592, 502, 3006, 1852, 4, 597, 455, 450, 3052, 1354, 362, 1206, 3052, 493, 824, 232, 318, 348, 468, 468, 468, 388, 388, 88, 88, 715, 119, 382, 694, 600, 412, 340, 481, 184, 57, 228, 107, 38, 253, 1405, 119, 425, 1073, 284, 1593, 1593, 569, 1938, 1531, 366, 2992, 183, 452, 256, 167, 167, 479, 135, 481, 204, 1849, 1089, 1089, 823, 168, 452, 345, 373, 545, 545, 545, 545], "source_text": "USER: What is the most popular smartphone brand these days? ASSISTANT: As an AI language model, I don't have updated information on the current market scenario. However, according to recent statistics, Samsung, Apple, and Huawei are among the most popular smartphone brands worldwide. : Interesting! Among these three brands, which one do you think has the most features and benefits for its price?", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl-en/audio/audio_5.wav", "target_text": "As an AI language model, I cannot have personal opinions or bias towards any brand. However, all three brands - Samsung, Apple, and Huawei - offer different features and benefits at different price points. It ultimately comes down to your personal preferences and requirements, like camera quality, design, software, operating system, and other factors, to choose the right brand and model that fits your needs and budget."} +{"split_name": "identity", "index": 9, "round": 1, "target_token": [745, 530, 545, 3132, 495, 468, 1519, 196, 2503, 4030, 2645, 601, 601, 202, 434, 337, 193, 193, 193, 522, 522, 522, 357, 361, 85, 192, 1405, 3108, 3108, 65, 550, 312, 568, 261, 3480, 188, 476, 3406, 226, 10, 10, 155, 704, 723, 737, 737, 48, 174, 389, 650, 389, 655, 372, 181, 186, 186, 186, 285, 117, 733, 5, 5, 214, 571, 475, 388, 310, 253, 55, 342, 733, 5, 214, 427, 519, 755, 190, 467, 3108, 396, 706, 79, 468, 468, 269, 269, 269, 27, 269, 468, 1519, 557, 605, 605, 513, 3950, 3950, 102, 518, 194, 357, 2302, 903, 452, 256, 184, 260, 441, 9, 231, 231, 490, 608, 479, 39, 422, 706, 155, 10, 10, 704, 371, 389, 390, 655, 181, 186, 606, 80, 65, 54, 65, 740, 284, 416, 477, 337, 10, 287, 396, 467, 467, 453, 3290, 228, 456, 187, 479, 479, 137, 42, 584, 12, 292, 214, 427, 218, 475, 3605, 247, 104, 45, 3965, 3965, 3965, 183, 383, 345, 308, 89, 86, 107, 38, 122, 253, 374, 489, 195, 525, 525, 80, 411, 1206, 742, 618, 903, 23, 23, 391, 596, 3821, 226, 287, 445, 445, 79, 409, 409, 409, 1206, 456, 450, 727, 565, 515, 212, 511, 2437, 590, 64, 130, 42, 425, 706, 117, 335, 539, 152, 152, 385, 385, 11, 510, 339, 339, 1089, 189, 823, 452, 205, 479, 302, 302, 608, 9, 518, 522, 522, 522, 193, 522, 193, 3006, 418, 357, 357, 464, 1479, 658, 658, 440, 230, 646, 440, 230, 230, 491, 230, 440, 230, 230, 230, 230, 53, 646, 373, 1651, 658, 658, 583, 3158, 205, 196, 2989, 187, 38, 629, 281, 540, 103, 32, 1405, 98, 3836, 2299, 2299, 444, 444, 444, 40, 97, 737, 97, 554, 412, 231, 903, 618, 506, 1091, 42, 706, 117, 411, 1206, 422, 310, 87, 495, 475, 475, 551, 310, 87, 270, 481, 481, 394, 59, 64, 655, 1479, 119, 563, 74, 79, 422, 79, 79, 1206, 1206, 198, 351, 493, 493, 2057, 574, 219, 419, 238, 518, 231, 522, 3006, 1935, 96, 481, 204, 70, 99, 2782, 374, 467, 1397, 219, 585, 513, 366, 1261, 12, 452, 141, 141, 197, 519, 2844, 593, 122, 55, 1261, 153, 183, 515, 452, 141, 256, 86, 593, 55, 417, 584, 363, 688, 210, 3052, 56, 300, 300, 85, 172, 370, 370, 1307, 104, 203, 124, 696, 700, 515, 699, 455, 4, 4, 522, 357, 1065, 1261, 12, 238, 1938, 228, 177, 15, 338, 644, 453, 282, 195, 443, 658, 3821, 375, 67, 36, 497, 669, 545, 440, 230, 75, 440, 440, 75, 75, 75, 646, 146, 440, 2444, 2444, 221, 1019, 443, 121, 329, 640, 356, 143, 704, 213, 2299, 2299, 732, 528, 2712, 211, 558, 427, 281, 240, 292, 193, 193, 231, 518, 292, 240, 1923, 2989, 495, 87, 406, 593, 673, 426, 568, 97, 1073, 54, 824, 3966, 72, 257, 639, 541, 131, 640, 465, 36, 206, 1938, 419, 102, 193, 694, 231, 518, 570, 892, 346, 1065, 263, 665, 528, 554, 554, 554, 149, 714, 193, 193, 193, 518, 292, 603, 603, 107, 307, 253, 232, 382, 322, 322, 694, 518, 238, 228, 2031, 286, 400, 3142, 699, 3965, 515, 618, 452, 546, 107, 22, 122, 122, 613, 600, 1935, 518, 102, 135, 479, 107, 38, 824, 122, 14, 72, 223, 130, 396, 309, 132, 181, 4022, 1460, 186, 4022, 1460, 1460, 1460, 2828, 228, 187, 374, 479, 25, 2723, 2723, 199, 199, 199, 523, 417, 430, 119, 583, 646, 646, 230, 230, 646, 440, 230, 230, 230, 230, 140, 491, 327, 545, 658, 710, 19, 215, 645, 755, 1938, 38, 54, 824, 358, 425, 591, 557, 1519, 472, 472, 475, 475, 310, 253, 253, 296, 263, 155, 155, 10, 356, 143, 704, 656, 118, 554, 164, 548, 554, 457, 240, 3605, 50, 581, 42, 309, 706, 287, 117, 335, 198, 539, 338, 260, 441, 412, 694, 231, 518, 238, 107, 107, 1037, 523, 192, 481, 83, 630, 593, 479, 340, 232, 122, 824, 96, 72, 1073, 467, 3108, 1073, 305, 305, 333, 579, 579, 535, 656, 3108, 3108, 65, 453, 516, 254, 57, 593, 180, 180, 19, 710, 658, 1089, 584, 903, 688, 237, 237, 637, 530, 677, 658, 545, 545, 463, 2444, 646, 440, 463, 463, 463, 1651, 491, 646, 646, 1651, 658, 545, 545, 658, 658, 658, 221, 470, 467, 467, 190, 2503, 2782, 755, 1923, 38, 240, 247, 247, 247, 104, 403, 231, 193, 4, 363, 226, 42, 309, 706, 173, 690, 173, 411, 411, 411, 411, 173, 450, 727, 515, 183, 383, 433, 213, 889, 163, 490, 2992, 618, 506, 287, 287, 511, 1206, 594, 750, 391, 86, 585, 281, 188, 240, 699, 3965, 3965, 183, 383, 452, 546, 256, 20, 107, 22, 122, 122, 122, 122, 1037, 122, 122, 14, 412, 1935, 357, 357, 361, 481, 481, 630, 755, 3055, 55, 253, 376, 650, 263, 263, 337, 371, 371, 623, 30, 483, 343, 2057, 187, 89, 3055, 2514, 376, 55, 376, 695, 119, 706, 359, 498, 30, 483, 281, 100, 584, 391, 629, 281, 240, 102, 412, 193, 231, 518, 96, 441, 260, 516, 101, 3108, 467, 740, 489, 184, 57, 57, 107, 232, 122, 84, 119, 823, 340, 479, 1, 177, 539, 601, 612, 112, 732, 207, 325, 529, 624, 135, 102, 357, 193, 231, 518, 22, 441, 107, 516, 4085, 284, 467, 3108, 3108, 3108, 32, 296, 546, 1065, 530, 75, 230, 230, 230, 646, 491, 491, 230, 230, 230, 491, 373, 658, 658, 658, 334, 373, 630, 630, 585, 585, 87, 255, 546, 510, 558, 263, 371, 224, 77, 737, 285, 285, 411, 581, 395, 425, 704, 371, 511, 411, 411, 1206, 2579, 690, 3055, 56, 707, 321, 1479, 3132, 415, 1381, 64, 181, 186, 186, 1593, 595, 497, 211, 5, 214, 484, 406, 310, 41, 41, 253, 467, 1073, 190, 2503, 228, 187, 100, 1938, 89, 137, 211, 168, 597, 141, 427, 407, 218, 3055, 475, 179, 1531, 618, 699, 3965, 183, 383, 452, 256, 1938, 99, 20, 122, 122, 613, 184, 204, 673, 401, 297, 355, 257, 96, 1935, 1935, 518, 441, 190, 289, 1381, 1479, 1479, 563, 74, 245, 2503, 2503, 574, 286, 286, 179, 1923, 366, 211, 363, 452, 427, 621, 456, 2592, 282, 558, 665, 3575, 512, 213, 444, 737, 554, 533, 42, 317, 412, 201, 1529, 628, 127, 541, 632, 1938, 196, 39, 39, 399, 489, 192, 1405, 3108, 824, 656, 550, 1460, 261, 16, 514, 188, 476, 3406, 425, 704, 10, 155, 316, 224, 389, 118, 590, 163, 623, 337, 193, 193, 193, 518, 96, 441, 260, 99, 205, 55, 55, 342, 2712, 5, 225, 140, 408, 256, 232, 122, 1214, 2057, 2503, 507, 539, 514, 476, 552, 610, 659, 1089, 226, 287, 436, 185, 624, 628, 3966, 382, 544, 123, 464, 245, 101, 358, 42, 132, 706, 287, 335, 740, 539, 4030, 419, 412, 231, 518, 292, 22, 107, 1037, 1037, 19, 192, 613, 3108, 467, 1073, 3276, 266, 579, 579, 579, 65, 54, 54, 1073, 344, 2844, 2844, 57, 593, 1037, 180, 334, 658, 40, 1089, 211, 823, 688, 237, 632, 710, 636, 284, 530, 1405, 467, 3108, 656, 550, 333, 568, 261, 3480, 188, 476, 298, 511, 704, 10, 3836, 46, 268, 554, 554, 48, 174, 576, 560, 415, 163, 372, 550, 444, 444, 285, 619, 2712, 733, 5, 448, 484, 406, 551, 55, 376, 253, 633, 5, 214, 408, 653, 107, 38, 122, 682, 394, 668, 312, 550, 444, 737, 737, 258, 193, 193, 231, 231, 518, 570, 457, 154, 130, 396, 425, 591, 557, 468, 1519, 39, 167, 245, 92, 415, 1479, 1381, 2185, 260, 1, 1, 2031, 196, 2057, 309, 823, 363, 210, 722, 709, 42, 42, 706, 371, 335, 198, 644, 644, 539, 2579, 210, 2712, 1089, 584, 688, 628, 135, 9, 231, 231, 357, 391, 473, 280, 117, 263, 101, 190, 344, 1593, 385, 612, 1397, 187, 206, 304, 641, 205, 42, 584, 12, 597, 216, 214, 407, 244, 56, 56, 56, 323, 445, 323, 323, 378, 594, 456, 1228, 2057, 492, 430, 377, 646, 230, 440, 440, 440, 440, 440, 440, 440, 491, 1651, 545, 658, 40, 1091, 733, 733, 5, 214, 1923, 38, 493, 374, 2503, 539, 539, 2579, 427, 450, 124, 211, 391, 287, 321, 354, 624, 256, 518, 392, 2280, 329, 232, 493, 340, 469, 475, 475, 551, 247, 104, 484, 544, 2992, 565, 515, 515, 506, 427, 79, 1206, 409, 409, 409, 409, 409, 409, 409, 682, 4013, 573, 545, 679, 440, 440, 230, 491, 491, 1651, 646, 587, 587, 545, 658, 1849, 505, 673, 297, 4030, 198, 65, 467, 467, 731, 411, 690, 187, 358, 823, 391, 557, 47, 629, 499, 240, 719, 4, 194, 719, 719, 315, 903, 3254, 727, 565, 287, 1206, 1206, 344, 1593, 1593, 569, 711, 292, 719, 522, 193, 193, 231, 346, 481, 85, 204, 70, 99, 99, 107, 38, 1214, 238, 645, 645, 574, 2514, 253, 55, 558, 193, 322, 694, 518, 570, 72, 223, 4022, 1460, 2644, 601, 228, 2503, 218, 2031, 196, 86, 479, 374, 309, 220, 12, 597, 290, 214, 415, 157, 199, 199, 2723, 468, 1700, 353, 388, 633, 126, 214, 921, 545, 545, 545], "source_text": ": Can mindful walking be used as a form of meditation or spiritual practice?", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl-en/audio/audio_6.wav", "target_text": "Yes, mindful walking can be used as a form of meditation or spiritual practice. It involves bringing awareness and attention to each step, focusing on the physical sensations in the body, and being fully present in the moment. Many spiritual traditions, including Buddhism and Taoism, incorporate walking meditation as a way to cultivate mindfulness and connect with the present moment. Mindful walking can also be a way to practice gratitude, connect with nature, and reduce stress and anxiety."} +{"split_name": "identity", "index": 12, "round": 1, "target_token": [745, 545, 1651, 668, 620, 568, 261, 99, 647, 647, 475, 585, 198, 467, 54, 591, 391, 455, 4, 357, 3006, 12, 518, 711, 170, 186, 285, 285, 285, 285, 411, 411, 198, 2330, 79, 422, 157, 157, 468, 686, 399, 399, 204, 184, 228, 355, 441, 346, 1935, 231, 292, 22, 107, 38, 253, 180, 204, 630, 630, 755, 283, 348, 33, 551, 2989, 188, 3480, 202, 3539, 3480, 507, 647, 107, 623, 132, 310, 528, 462, 355, 99, 2844, 2844, 577, 55, 253, 3132, 274, 563, 74, 79, 175, 28, 727, 183, 515, 287, 130, 3108, 3108, 656, 550, 333, 333, 333, 333, 333, 2644, 223, 130, 101, 2185, 550, 590, 163, 163, 271, 271, 271, 535, 672, 224, 544, 443, 2444, 440, 440, 440, 440, 491, 440, 440, 440, 463, 463, 463, 463, 671, 1019, 710, 1849, 675, 426, 2644, 177, 1923, 2057, 309, 363, 313, 557, 1354, 169, 308, 493, 38, 585, 261, 568, 3539, 385, 275, 482, 317, 292, 488, 488, 141, 11, 612, 601, 601, 569, 619, 123, 382, 392, 490, 88, 283, 468, 1519, 605, 86, 94, 130, 396, 42, 706, 628, 1531, 72, 528, 554, 164, 502, 554, 22, 1923, 361, 317, 256, 400, 468, 1519, 160, 1942, 2592, 39, 363, 383, 210, 427, 89, 175, 187, 823, 391, 594, 89, 716, 42, 584, 183, 383, 335, 335, 721, 2579, 411, 411, 65, 54, 467, 740, 2579, 219, 340, 493, 122, 14, 96, 107, 441, 9, 518, 231, 3006, 1852, 231, 292, 72, 223, 1593, 101, 119, 650, 565, 450, 256, 190, 286, 95, 2712, 124, 211, 1531, 492, 473, 130, 467, 1073, 2844, 99, 16, 16, 134, 134, 3290, 745, 187, 190, 2330, 409, 409, 409, 409, 409, 409, 609, 409, 195, 435, 1849, 139, 384, 129, 129, 80, 80, 411, 1206, 650, 124, 733, 5, 214, 197, 540, 65, 467, 119, 706, 210, 210, 214, 427, 79, 79, 1206, 2330, 198, 734, 419, 503, 127, 419, 419, 238, 518, 4, 719, 231, 3254, 2992, 12, 290, 141, 141, 297, 177, 90, 2712, 558, 263, 231, 357, 391, 226, 1091, 309, 706, 741, 2185, 92, 623, 42, 144, 2330, 1089, 823, 363, 210, 427, 151, 609, 609, 609, 394, 223, 1593, 1593, 1593, 72, 346, 3094, 96, 479, 167, 79, 110, 409, 409, 498, 445, 445, 445, 527, 159, 455, 2891, 23, 522, 522, 522, 357, 299, 464, 377, 545, 545, 463, 440, 440, 646, 440, 440, 440, 440, 440, 230, 440, 230, 440, 440, 230, 230, 646, 463, 545, 545, 1091, 1091, 358, 704, 371, 3575, 623, 548, 439, 283, 1519, 1519, 247, 240, 240, 193, 1935, 823, 226, 42, 309, 706, 287, 335, 50, 50, 411, 532, 3836, 511, 711, 444, 444, 333, 3539, 1593, 539, 676, 1923, 187, 358, 77, 467, 467, 690, 348, 472, 551, 1354, 30, 563, 3094, 423, 94, 86, 340, 2712, 124, 725, 292, 9, 441, 9, 518, 231, 357, 823, 450, 1091, 425, 226, 628, 441, 340, 4, 522, 193, 231, 292, 238, 1923, 540, 651, 640, 541, 11, 612, 514, 95, 2592, 558, 114, 194, 522, 231, 518, 292, 603, 601, 569, 1206, 446, 450, 565, 618, 238, 206, 3055, 95, 2712, 124, 189, 286, 226, 720, 467, 65, 401, 507, 99, 99, 16, 3290, 745, 187, 190, 2330, 409, 362, 362, 1354, 370, 269, 269, 27, 468, 2514, 629, 483, 30, 268, 48, 737, 2299, 285, 198, 80, 354, 532, 576, 362, 409, 1206, 101, 704, 155, 155, 143, 371, 709, 416, 409, 409, 606, 606, 606, 577, 577, 388, 388, 253, 253, 376, 214, 189, 823, 363, 383, 210, 427, 244, 56, 56, 56, 445, 323, 498, 277, 295, 548, 265, 116, 1065, 2444, 230, 230, 440, 230, 230, 440, 440, 440, 440, 440, 440, 646, 545, 377, 299, 522, 231, 4, 292, 238, 674, 439, 123, 337, 96, 72, 228, 187, 1397, 170, 1460, 3539, 1593, 275, 2989, 95, 2592, 5, 448, 214, 427, 11, 3406, 536, 309, 706, 335, 335, 50, 50, 411, 532, 3836, 511, 711, 444, 333, 186, 3539, 1593, 539, 218, 1531, 1942, 366, 153, 699, 225, 310, 1519, 2514, 175, 3950, 548, 563, 563, 74, 259, 256, 292, 392, 299, 345, 238, 1923, 196, 556, 396, 309, 706, 734, 228, 509, 297, 745, 187, 190, 2330, 409, 498, 576, 576, 362, 362, 1206, 621, 716, 386, 522, 357, 823, 596, 1091, 132, 213, 4022, 1460, 333, 312, 568, 3276, 3276, 3276, 3276, 364, 9, 180, 19, 334, 342, 1261, 382, 3006, 3006, 522, 231, 522, 522, 522, 522, 357, 299, 299, 671, 545, 545, 545, 545, 658, 692, 105], "source_text": ": How does this textbook compare to other textbooks on the same subject matter?", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl-en/audio/audio_7.wav", "target_text": "I'm sorry, as an AI language model, I do not have the ability to determine the subject matter or compare this textbook to others. Please provide me with the specific subject matter you are referring to so that I can provide you with a better response."} +{"split_name": "identity", "index": 17, "round": 1, "target_token": [745, 545, 658, 70, 401, 539, 539, 338, 72, 441, 600, 694, 694, 412, 102, 1923, 340, 232, 1037, 204, 83, 630, 755, 469, 283, 551, 551, 104, 188, 275, 385, 3539, 3539, 62, 507, 755, 374, 14, 468, 528, 462, 355, 4085, 99, 2752, 422, 613, 253, 389, 274, 563, 74, 79, 175, 1942, 1065, 153, 183, 506, 287, 396, 824, 467, 181, 550, 333, 333, 186, 186, 739, 223, 2386, 556, 2185, 590, 590, 163, 163, 590, 195, 195, 1423, 426, 1593, 297, 86, 1531, 187, 375, 823, 391, 422, 175, 250, 100, 493, 1214, 238, 4085, 3539, 3945, 3945, 575, 275, 425, 168, 141, 488, 141, 11, 3480, 601, 539, 284, 740, 704, 3836, 124, 1091, 132, 706, 371, 511, 411, 409, 198, 101, 383, 2891, 23, 4, 518, 292, 2297, 824, 1214, 738, 579, 390, 535, 649, 195, 394, 462, 245, 119, 42, 132, 335, 210, 214, 308, 283, 33, 3406, 3406, 3406, 27, 33, 1519, 1923, 107, 3827, 376, 122, 122, 633, 600, 694, 231, 464, 441, 424, 1460, 620, 355, 1214, 1037, 204, 70, 70, 3290, 16, 601, 603, 433, 337, 392, 544, 607, 464, 340, 479, 479, 273, 90, 744, 744, 85, 710, 373, 558, 558, 46, 46, 522, 522, 522, 522, 522, 522, 3006, 357, 1479, 1479, 658, 658, 463, 440, 646, 491, 637, 230, 230, 230, 230, 491, 491, 491, 491, 440, 646, 1651, 491, 545, 658, 658, 373, 75, 75, 488, 562, 601, 1593, 118, 1381, 1479, 48, 2299, 612, 612, 50, 411, 98, 3836, 706, 411, 50, 411, 165, 536, 560, 165, 173, 1354, 2514, 1519, 196, 196, 218, 177, 539, 1593, 1593, 471, 118, 163, 163, 118, 72, 72, 596, 168, 1531, 1519, 468, 172, 456, 467, 3108, 65, 190, 2752, 138, 388, 388, 388, 253, 87, 298, 476, 506, 515, 3965, 183, 3766, 452, 632, 206, 1938, 734, 734, 245, 245, 421, 608, 608, 439, 346, 357, 3006, 231, 292, 141, 433, 645, 228, 734, 378, 119, 425, 706, 537, 86, 43, 253, 376, 650, 282, 2330, 143, 287, 335, 50, 281, 187, 100, 674, 69, 292, 4, 3006, 3006, 231, 518, 570, 72, 223, 1460, 4022, 4022, 4022, 644, 297, 1, 479, 86, 292, 231, 903, 450, 506, 565, 10, 143, 371, 511, 80, 409, 489, 489, 489, 79, 79, 733, 558, 107, 522, 357, 504, 226, 42, 704, 533, 3132, 163, 889, 2437, 206, 1923, 2057, 596, 340, 179, 2514, 310, 253, 376, 224, 42, 706, 511, 511, 198, 1206, 456, 189, 823, 618, 141, 256, 240, 240, 518, 193, 193, 4, 292, 238, 456, 42, 1091, 317, 36, 141, 197, 2844, 2844, 254, 364, 1037, 122, 1037, 180, 192, 1261, 123, 3006, 3006, 522, 1852, 3006, 3006, 522, 357, 623, 1479, 631, 2444, 279, 230, 230, 230, 230, 230, 230, 646, 327, 545, 710, 702, 490, 522, 193, 231, 292, 238, 114, 684, 253, 687, 322, 322, 694, 518, 238, 1923, 585, 3605, 247, 104, 699, 699, 515, 383, 452, 238, 240, 235, 38, 69, 892, 462, 336, 548, 163, 889, 4022, 1460, 1593, 297, 2503, 86, 168, 1935, 231, 518, 464, 351, 2752, 55, 253, 633, 119, 425, 706, 371, 117, 709, 409, 198, 740, 725, 699, 23, 194, 597, 292, 441, 361, 232, 432, 462, 579, 390, 271, 649, 481, 481, 399, 86, 281, 281, 188, 699, 699, 699, 699, 699, 383, 427, 557, 422, 445, 56, 56, 159, 159, 445, 159, 159, 250, 159, 2297, 429, 357, 522, 522, 1852, 357, 490, 377, 631, 279, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 140, 334, 710, 442, 598, 519, 2782, 308, 298, 87, 596, 733, 506, 287, 212, 316, 371, 165, 362, 1354, 1354, 3466, 3466, 3466, 3605, 179, 2057, 187, 1923, 1519, 551, 310, 87, 253, 54, 467, 731, 574, 256, 518, 231, 522, 600, 2280, 3836, 27, 548, 623, 419, 629, 1519, 1531, 596, 168, 88, 283, 551, 310, 253, 253, 3108, 232, 196, 3350, 2752, 401, 2579, 606, 606, 80, 80, 411, 1206, 190, 456, 596, 2057, 1938, 167, 86, 135, 479, 302, 378, 498, 503, 1942, 903, 23, 3006, 357, 12, 363, 212, 287, 155, 371, 720, 198, 728, 346, 119, 3966, 419, 69, 518, 193, 193, 193, 231, 518, 292, 479, 479, 479, 39, 2514, 702, 124, 733, 5, 95, 196, 378, 116, 3836, 706, 326, 401, 2844, 57, 55, 376, 584, 618, 506, 591, 1206, 594, 596, 211, 363, 452, 141, 427, 2752, 2752, 388, 388, 647, 585, 467, 3108, 3108, 3108, 709, 401, 2844, 99, 57, 9, 180, 19, 710, 342, 342, 1091, 584, 12, 688, 299, 237, 637, 373, 530, 545, 658, 658, 658, 1651, 279, 230, 230, 230, 440, 582, 440, 440, 440, 440, 440, 463, 545, 545, 545, 583, 431, 546, 728, 198, 358, 42, 317, 706, 371, 511, 285, 539, 612, 11, 67, 699, 183, 565, 226, 50, 50, 50, 476, 287, 211, 823, 618, 506, 427, 50, 581, 42, 42, 704, 706, 411, 411, 371, 155, 155, 10, 3836, 132, 389, 571, 1596, 181, 285, 80, 411, 80, 32, 613, 116, 3836, 155, 371, 511, 198, 728, 441, 123, 3966, 1938, 3055, 450, 2592, 733, 5, 214, 214, 408, 197, 507, 16, 16, 606, 285, 467, 54, 65, 80, 80, 409, 2330, 537, 326, 441, 439, 346, 357, 522, 194, 504, 28, 584, 5, 214, 141, 197, 257, 1214, 180, 394, 668, 312, 181, 444, 444, 737, 737, 171, 412, 231, 193, 231, 518, 570, 674, 892, 528, 623, 3132, 3094, 3966, 419, 400, 157, 172, 605, 605, 1531, 187, 375, 584, 363, 383, 210, 427, 594, 609, 609, 195, 394, 426, 338, 4030, 1397, 493, 38, 374, 204, 70, 297, 2645, 257, 433, 392, 131, 292, 257, 728, 556, 1065, 282, 263, 274, 590, 889, 1381, 130, 358, 706, 594, 400, 160, 214, 214, 408, 427, 574, 196, 475, 475, 3605, 281, 173, 185, 98, 287, 473, 422, 79, 79, 79, 1206, 409, 409, 409, 409, 409, 409, 682, 287, 573, 443, 679, 230, 230, 230, 440, 440, 440, 230, 230, 230, 440, 463, 545, 545, 545, 702, 357, 3006, 231, 518, 570, 569, 612, 540, 127, 1065, 183, 183, 452, 11, 674, 69, 22, 231, 694, 704, 132, 1104, 250, 623, 889, 333, 181, 333, 333, 568, 2644, 4085, 20, 107, 469, 469, 469, 472, 475, 310, 253, 420, 1104, 180, 195, 181, 80, 80, 80, 80, 467, 3108, 467, 1206, 585, 232, 824, 38, 86, 469, 574, 591, 42, 42, 706, 3052, 400, 419, 528, 548, 548, 528, 441, 86, 196, 196, 2057, 187, 89, 400, 3350, 253, 253, 390, 215, 673, 426, 426, 2644, 2644, 462, 2437, 503, 116, 441, 1938, 326, 79, 1206, 409, 409, 110, 110, 110, 445, 722, 159, 269, 23, 357, 522, 299, 357, 357, 357, 168, 1479, 677, 221, 279, 230, 230, 230, 230, 745, 1479, 1651, 1651, 658, 583, 686, 468, 348, 468, 33, 247, 247, 247, 1519, 283, 1531, 135, 608, 528, 548, 548, 548, 548, 548, 277, 384, 609, 85, 106, 658, 658, 545, 221, 3094, 695, 695, 596, 3966, 441, 260, 223, 590, 548, 172, 623, 590, 15, 15, 3823, 152, 385, 514, 1053, 2712, 2592, 733, 11, 612, 285, 1073, 155, 704, 42, 1091, 425, 706, 371, 511, 511, 186, 186, 186, 1593, 539, 2503, 259, 498, 533, 34, 540, 102, 518, 231, 193, 522, 193, 231, 292, 240, 629, 172, 1307, 1519, 686, 83, 399, 184, 2503, 519, 467, 253, 125, 2712, 733, 302, 36, 512, 371, 511, 3539, 620, 3539, 352, 613, 337, 231, 1935, 710, 382, 903, 292, 238, 69, 1037, 180, 19, 346, 1852, 194, 688, 212, 287, 3821, 117, 117, 591, 173, 50, 47, 173, 473, 399, 707, 626, 493, 493, 122, 38, 755, 138, 157, 157, 388, 388, 353, 2712, 124, 5, 3350, 214, 408, 519, 374, 493, 626, 292, 408, 408, 653, 507, 3290, 344, 498, 119, 2280, 533, 342, 122, 38, 479, 57, 228, 218, 3605, 370, 2712, 95, 475, 1923, 187, 2057, 259, 540, 624, 321, 98, 81, 489, 83, 630, 755, 755, 2579, 65, 54, 467, 98, 132, 335, 408, 141, 562, 3290, 713, 713, 713, 644, 539, 401, 2712, 2592, 558, 665, 46, 522, 1852, 1935, 96, 609, 85, 426, 568, 3539, 620, 526, 3108, 3108, 243, 355, 516, 467, 3108, 3108, 65, 453, 509, 4085, 603, 255, 427, 124, 823, 363, 570, 258, 554, 554, 164, 3575, 71, 75, 75, 141, 11, 645, 3290, 2644, 223, 590, 889, 889, 889, 149, 337, 10, 10, 3836, 503, 394, 481, 435, 204, 70, 519, 99, 516, 446, 414, 633, 824, 633, 1381, 92, 260, 355, 223, 471, 118, 118, 130, 396, 42, 706, 594, 157, 468, 468, 472, 472, 469, 469, 469, 469, 388, 310, 253, 684, 593, 204, 673, 426, 569, 284, 126, 704, 42, 1091, 425, 706, 371, 511, 198, 198, 594, 28, 584, 363, 210, 427, 206, 256, 518, 231, 193, 522, 292, 628, 101, 119, 124, 317, 36, 427, 519, 1938, 479, 57, 9, 1037, 1037, 180, 192, 377, 710, 545, 600, 322, 694, 193, 522, 522, 3006, 522, 522, 357, 1479, 710, 221, 279, 230, 230, 440, 440, 230, 230, 230, 491, 230, 230, 230, 230, 230, 491, 1651, 1651, 545, 545, 545, 658, 710, 293, 83, 605, 196, 1923, 187, 2057, 585, 1923, 88, 231, 694, 231, 292, 340, 540, 65, 54, 14, 42, 706, 371, 371, 77, 655, 737, 285, 80, 409, 501, 489, 456, 270, 298, 3406, 3406, 482, 212, 155, 10, 155, 704, 511, 285, 411, 80, 1073, 42, 42, 706, 371, 371, 721, 173, 456, 1228, 226, 594, 323, 343, 343, 540, 102, 455, 194, 1852, 12, 383, 591, 50, 411, 362, 173, 173, 151, 455, 23, 357, 481, 481, 184, 228, 676, 1938, 187, 493, 122, 54, 1214, 1923, 219, 275, 177, 2330, 1423, 195, 523, 1405, 467, 129, 129, 409, 409, 409, 411, 34, 2592, 733, 287, 455, 194, 194, 688, 1065, 2992, 183, 506, 427, 690, 731, 130, 42, 425, 706, 511, 531, 512, 186, 186, 539, 401, 574, 2579, 2579, 198, 411, 50, 456, 42, 584, 363, 141, 433, 512, 3539, 444, 4022, 739, 739, 539, 177, 2503, 1938, 196, 419, 361, 4, 231, 193, 356, 382, 3254, 131, 346, 441, 9, 518, 231, 522, 193, 231, 518, 238, 479, 2031, 179, 3605, 400, 18, 155, 131, 506, 473, 189, 1531, 290, 174, 1307, 104, 1519, 686, 83, 399, 630, 1923, 238, 493, 824, 310, 375, 220, 168, 256, 196, 475, 2514, 596, 702, 55, 1214, 574, 2752, 684, 716, 42, 124, 823, 597, 290, 427, 1531, 157, 2723, 2723, 157, 468, 39, 399, 399, 489, 184, 260, 2437, 289, 498, 3094, 123, 382, 3966, 441, 260, 734, 326, 421, 154, 624, 439, 346, 231, 357, 439, 481, 481, 107, 340, 493, 122, 709, 446, 98, 354, 129, 4022, 1460, 4022, 739, 739, 739, 550, 550, 889, 889, 118, 623, 317, 366, 168, 135, 481, 399, 399, 2057, 38, 253, 613, 32, 354, 362, 79, 422, 244, 2752, 2844, 3290, 1235, 3823, 620, 312, 535, 535, 314, 59, 441, 361, 124, 584, 12, 452, 345, 205, 25, 39, 2723, 2723, 199, 353, 310, 43, 417, 3094, 583, 583, 221, 679, 230, 230, 440, 230, 230, 230, 440, 230, 440, 440, 440, 440, 440, 463, 545, 545, 450, 1091, 1091, 584, 363, 345, 488, 562, 539, 507, 514, 3605, 104, 3406, 3406, 226, 506, 515, 3965, 3965, 183, 383, 546, 540, 1938, 734, 245, 424, 424, 245, 245, 326, 378, 154, 378, 378, 3950, 3950, 250, 159, 429, 299, 522, 522, 522, 357, 357, 127, 1479, 710, 221, 279, 679, 230, 230, 508, 491, 636], "source_text": ": What do you think about the ethics of Reality TV shows?", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl-en/audio/audio_8.wav", "target_text": "As an AI language model, I do not have personal opinions on ethics. However, reality TV shows have been criticized for exploiting participants, sensationalizing personal issues, and creating misleading narratives for the sake of entertainment. The pressure to perform for the cameras can also lead to unethical behavior, such as lying or manipulating others. Additionally, the lack of privacy and constant scrutiny can have negative impacts on the mental health and well-being of participants. It is important for producers and networks to prioritize the safety and dignity of those involved in reality TV shows."} +{"split_name": "identity", "index": 22, "round": 1, "target_token": [284, 530, 545, 203, 172, 1519, 196, 228, 2645, 603, 603, 570, 627, 193, 193, 193, 193, 193, 193, 322, 193, 292, 238, 603, 601, 601, 601, 482, 704, 711, 170, 486, 164, 164, 889, 170, 426, 228, 630, 399, 137, 750, 124, 823, 597, 141, 335, 645, 3055, 95, 42, 482, 342, 55, 232, 256, 16, 261, 568, 333, 1596, 1596, 150, 258, 419, 380, 377, 153, 183, 306, 513, 157, 157, 2723, 214, 2592, 733, 5, 214, 408, 197, 99, 2503, 362, 467, 396, 425, 591, 557, 468, 468, 27, 269, 269, 468, 242, 175, 343, 513, 483, 483, 201, 518, 597, 194, 2302, 3254, 363, 618, 210, 351, 407, 450, 2592, 565, 506, 212, 733, 335, 257, 401, 644, 644, 539, 401, 18, 2712, 282, 274, 149, 3132, 415, 51, 181, 186, 1593, 612, 612, 603, 258, 581, 309, 3966, 1988, 528, 471, 223, 297, 479, 407, 430, 704, 650, 1091, 358, 67, 512, 512, 4022, 1460, 1460, 424, 482, 42, 425, 706, 342, 2723, 135, 608, 548, 528, 528, 260, 479, 196, 179, 1519, 43, 383, 3965, 3965, 183, 383, 141, 256, 107, 364, 1037, 122, 122, 122, 122, 122, 84, 623, 3006, 522, 193, 357, 490, 481, 85, 85, 464, 292, 231, 193, 193, 694, 518, 238, 228, 601, 612, 603, 67, 42, 823, 688, 433, 434, 258, 554, 164, 163, 590, 223, 569, 297, 218, 196, 1531, 702, 1065, 3006, 694, 193, 357, 340, 83, 686, 39, 2514, 366, 124, 282, 263, 274, 371, 511, 387, 259, 540, 359, 42, 211, 602, 482, 212, 287, 117, 576, 354, 89, 256, 518, 392, 168, 210, 117, 511, 498, 1206, 409, 80, 3, 824, 3108, 467, 2185, 158, 2437, 732, 732, 457, 609, 85, 520, 123, 3006, 193, 193, 518, 292, 11, 603, 4085, 22, 253, 14, 382, 322, 694, 518, 292, 205, 283, 310, 523, 55, 271, 2344, 630, 755, 240, 3406, 3406, 554, 1852, 153, 727, 565, 506, 287, 721, 1206, 467, 3108, 198, 401, 2844, 593, 593, 1037, 122, 1037, 28, 2280, 3006, 1935, 231, 504, 665, 2592, 733, 5, 214, 197, 99, 43, 55, 1479, 506, 506, 212, 335, 740, 190, 644, 644, 539, 401, 18, 544, 658, 669, 153, 183, 515, 210, 427, 574, 3055, 388, 551, 104, 87, 235, 153, 3965, 345, 205, 419, 135, 346, 357, 522, 357, 490, 2297, 593, 122, 682, 446, 119, 2386, 326, 453, 260, 479, 137, 2992, 618, 456, 135, 1531, 596, 211, 12, 597, 141, 427, 196, 475, 3605, 247, 104, 318, 699, 699, 3965, 183, 452, 427, 256, 516, 57, 9, 682, 84, 122, 122, 84, 84, 417, 18, 280, 2530, 2444, 2444, 2444, 436, 2444, 2444, 436, 436, 121, 1035, 545, 710, 669, 3132, 415, 415, 655, 51, 181, 181, 285, 50, 482, 100, 50, 50, 411, 411, 576, 532, 354, 362, 1206, 422, 39, 137, 518, 522, 357, 194, 452, 706, 289, 129, 129, 80, 409, 409, 2386, 618, 23, 522, 522, 518, 292, 102, 135, 135, 135, 135, 135, 250, 346, 429, 357, 193, 522, 3006, 193, 518, 1479, 1065, 646, 279, 440, 230, 230, 230, 440, 230, 230, 440, 230, 440, 646, 373, 710, 334, 215, 70, 2782, 308, 493, 253, 484, 282, 558, 263, 3575, 2691, 77, 3945, 444, 3539, 601, 188, 514, 65, 298, 165, 281, 1923, 479, 481, 192, 42, 42, 67, 141, 488, 217, 645, 601, 152, 739, 739, 158, 482, 742, 1206, 1206, 409, 606, 351, 270, 253, 55, 823, 23, 3006, 193, 123, 10, 3966, 72, 228, 184, 2344, 399, 2592, 733, 5, 214, 755, 107, 253, 38, 292, 408, 408, 653, 3480, 16, 3823, 539, 289, 358, 321, 326, 2057, 232, 180, 83, 83, 160, 283, 1923, 50, 298, 3406, 42, 317, 292, 141, 141, 645, 601, 152, 539, 539, 401, 2, 650, 2592, 189, 363, 361, 481, 394, 2644, 2644, 526, 1214, 656, 633, 396, 425, 655, 51, 4022, 186, 1593, 97, 97, 258, 581, 358, 425, 3966, 118, 471, 223, 260, 2503, 2, 337, 704, 42, 42, 132, 512, 512, 4022, 1460, 1460, 284, 2386, 2592, 602, 706, 473, 370, 1531, 608, 548, 528, 441, 260, 479, 25, 39, 157, 137, 383, 3965, 3965, 183, 699, 452, 632, 593, 364, 364, 2297, 122, 122, 122, 122, 122, 417, 123, 3006, 1935, 357, 518, 481, 85, 1357, 75, 279, 230, 440, 230, 230, 230, 75, 646, 2530, 669, 377, 3928, 299, 193, 193, 193, 193, 518, 240, 1923, 676, 165, 298, 132, 258, 554, 554, 554, 554, 457, 69, 629, 1519, 1519, 172, 377, 153, 699, 313, 1519, 1519, 1519, 690, 130, 42, 706, 287, 214, 390, 196, 1938, 441, 168, 231, 231, 504, 497, 282, 558, 36, 316, 433, 512, 1593, 444, 444, 444, 889, 48, 163, 118, 2437, 1206, 1206, 362, 1206, 621, 361, 903, 194, 504, 85, 733, 452, 214, 197, 219, 574, 362, 467, 358, 706, 422, 468, 1519, 196, 259, 1938, 366, 584, 363, 452, 141, 335, 562, 539, 3823, 3823, 338, 60, 2712, 450, 565, 515, 699, 903, 3254, 2992, 618, 506, 335, 498, 489, 399, 630, 630, 2752, 99, 99, 4085, 603, 296, 38, 612, 213, 467, 54, 656, 579, 579, 390, 163, 163, 548, 528, 518, 231, 357, 3254, 2992, 618, 210, 351, 594, 492, 28, 506, 212, 287, 36, 335, 190, 158, 644, 539, 2579, 215, 2712, 497, 497, 131, 292, 256, 94, 1206, 80, 65, 3108, 467, 731, 343, 343, 133, 30, 559, 123, 10, 131, 636, 467, 467, 740, 20, 20, 102, 1037, 122, 626, 2280, 3006, 1935, 357, 518, 96, 481, 184, 57, 99, 351, 467, 130, 425, 706, 422, 1519, 160, 214, 214, 70, 196, 2989, 2989, 514, 50, 321, 704, 287, 476, 690, 1206, 409, 3, 42, 425, 706, 335, 141, 562, 15, 739, 739, 739, 739, 739, 745, 187, 742, 409, 1206, 1206, 606, 537, 20, 1037, 41, 122, 122, 122, 613, 1942, 357, 3006, 522, 357, 357, 490, 377, 463, 279, 440, 582, 582, 582, 582, 230, 230, 230, 230, 230, 230, 230, 508, 1145, 545, 583, 583, 377, 205, 1923, 281, 240, 412, 193, 193, 231, 518, 240, 676, 188, 476, 87, 14, 98, 143, 287, 721, 50, 411, 65, 467, 731, 574, 3055, 475, 475, 407, 383, 699, 183, 3766, 452, 238, 585, 310, 253, 253, 342, 584, 597, 214, 141, 107, 38, 824, 122, 1214, 96, 72, 4085, 4085, 219, 1397, 493, 54, 358, 706, 690, 1307, 1307, 175, 541, 155, 216, 639, 36, 519, 734, 228, 1938, 2057, 1228, 358, 42, 425, 67, 1938, 228, 15, 539, 539, 2579, 2, 2712, 473, 189, 584, 363, 506, 335, 351, 1206, 354, 3094, 359, 362, 1354, 3605, 281, 104, 4, 3006, 357, 363, 226, 721, 198, 409, 409, 1206, 456, 2592, 744, 565, 515, 506, 473, 722, 79, 323, 323, 1206, 362, 110, 110, 101, 492, 455, 23, 357, 346, 481, 184, 70, 519, 253, 55, 430, 211, 558, 36, 433, 512, 568, 3539, 568, 145, 342, 382, 1935, 12, 383, 427, 2579, 411, 359, 98, 591, 1206, 286, 3055, 39, 407, 618, 3965, 515, 515, 383, 702, 256, 107, 232, 122, 38, 374, 516, 22, 292, 518, 522, 522, 357, 418, 2302, 2302, 3006, 1852, 522, 194, 502, 1065, 2992, 618, 506, 427, 175, 416, 467, 54, 467, 656, 471, 333, 3539, 3539, 97, 65, 1397, 202, 114, 3827, 42, 211, 823, 618, 473, 591, 1206, 409, 409, 409, 2330, 489, 401, 401, 198, 65, 467, 119, 704, 706, 511, 1206, 456, 189, 823, 363, 141, 653, 734, 539, 539, 574, 702, 650, 124, 1089, 823, 391, 300, 399, 479, 755, 593, 122, 55, 596, 823, 256, 286, 244, 422, 388, 469, 310, 342, 618, 515, 287, 1206, 1206, 110, 306, 119, 3254, 3006, 522, 522, 357, 391, 42, 309, 226, 557, 1307, 104, 104, 403, 193, 193, 231, 4, 290, 235, 1519, 157, 157, 242, 1531, 234, 357, 231, 518, 481, 481, 184, 355, 355, 22, 55, 596, 67, 479, 273, 409, 198, 709, 335, 230, 217, 562, 62, 62, 385, 97, 581, 309, 132, 11, 72, 707, 189, 823, 168, 141, 653, 355, 509, 134, 134, 134, 1235, 426, 673, 394, 609, 443, 1849, 583, 2280, 600, 322, 193, 3006, 522, 522, 357, 357, 490, 377, 545, 279, 279, 2444, 373], "source_text": ": Can satellite technology be used to track wildlife populations?", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl-en/audio/audio_9.wav", "target_text": "Yes, satellite technology can be used to track wildlife populations. Satellites equipped with remote sensing instruments can track changes in vegetation, water resources, and climate patterns that can have an impact on wildlife populations. Similarly, GPS collars can be attached to animals to track their movements and behavior patterns. This information can then be fed back to researchers and conservationists to monitor and protect endangered species and their habitats."} diff --git a/examples/s2s/demo/demo_data/jsonl_demo-zh.jsonl b/examples/s2s/demo/demo_data/jsonl_demo-zh.jsonl new file mode 100644 index 00000000..4ec9df26 --- /dev/null +++ b/examples/s2s/demo/demo_data/jsonl_demo-zh.jsonl @@ -0,0 +1,10 @@ +{"split_name": "train_3.5M_CN_ready4cosy_wo_code_switching", "index": 16012449, "round": 1, "target_token": [742, 383, 455, 73, 315, 699, 726, 427, 676, 601, 601, 202, 3480, 476, 298, 87, 212, 227, 143, 371, 371, 224, 389, 21, 21, 396, 358, 132, 64, 889, 444, 333, 512, 553, 553, 553, 217, 3823, 1235, 115, 57, 516, 513, 270, 270, 597, 194, 194, 23, 23, 2302, 153, 556, 289, 21, 311, 2185, 387, 1930, 690, 94, 624, 732, 325, 623, 6, 124, 211, 733, 617, 3055, 2031, 51, 686, 686, 686, 750, 375, 189, 220, 403, 283, 247, 551, 87, 32, 41, 41, 415, 149, 274, 64, 4022, 1593, 3290, 205, 493, 87, 240, 585, 219, 645, 102, 613, 122, 722, 321, 337, 356, 193, 193, 193, 193, 418, 2297, 159, 608, 159, 159, 346, 441, 892, 441, 439, 503, 119, 124, 544, 418, 193, 193, 239, 429, 608, 346, 159, 180, 350, 350, 123, 418, 193, 600, 559, 457, 457, 74, 414, 337, 155, 10, 10, 3836, 533, 563, 277, 101, 596, 227, 227, 706, 511, 434, 385, 3539, 568, 312, 3276, 312, 579, 672, 58, 58, 2691, 77, 889, 889, 889, 51, 51, 535, 1035, 280, 436, 222, 71, 440, 440, 71, 440, 440, 440, 71, 440, 2444, 436, 436, 1035, 280, 3821, 710, 14, 74, 245, 260, 516, 664, 446, 54, 298, 298, 309, 1397, 674, 72, 9, 9, 159, 84, 55, 596, 226, 2514, 475, 514, 507, 16, 364, 414, 1405, 1405, 409, 273, 2344, 199, 199, 160, 648, 4, 386, 2891, 2891, 2891, 153, 618, 3052, 740, 284, 150, 64, 1381, 92, 731, 456, 226, 227, 594, 1206, 351, 212, 515, 3766, 3766, 3766, 527, 594, 166, 199, 1700, 27, 388, 3466, 116, 650, 94, 557, 380, 203, 2992, 153, 565, 1354, 1354, 1354, 323, 135, 346, 503, 123, 3006, 418, 346, 608, 608, 2849, 2849, 180, 19, 106, 208, 1405, 253, 298, 389, 48, 207, 174, 325, 89, 1228, 317, 168, 1519, 157, 199, 43, 383, 515, 515, 183, 226, 621, 215, 650, 36, 141, 488, 217, 385, 507, 577, 41, 626, 558, 141, 217, 3823, 713, 2844, 408, 197, 212, 515, 3766, 465, 246, 227, 100, 50, 423, 10, 356, 3836, 132, 64, 51, 311, 1479, 28, 263, 263, 665, 653, 621, 475, 1519, 468, 157, 205, 342, 2592, 450, 227, 726, 50, 112, 732, 554, 406, 1104, 270, 493, 626, 2992, 3965, 506, 210, 427, 621, 734, 361, 346, 608, 424, 346, 3254, 2302, 194, 504, 224, 2691, 149, 389, 523, 523, 477, 55, 342, 823, 391, 690, 259, 151, 527, 133, 94, 286, 306, 2992, 153, 700, 492, 1354, 323, 3950, 608, 346, 357, 346, 302, 273, 456, 309, 227, 101, 525, 314, 535, 390, 523, 523, 672, 721, 212, 3766, 515, 726, 226, 514, 152, 539, 3480, 240, 493, 298, 745, 227, 143, 371, 77, 633, 271, 523, 3132, 274, 92, 223, 15, 115, 205, 55, 626, 137, 332, 332, 57, 102, 1037, 306, 123, 418, 193, 356, 193, 193, 46, 346, 180, 619], "source_text": " : 给定一段文本和关键词列表,删除文本中包含所有给定关键词的子字符串。\n文本:\"这是一个测试句子,目的是看看模型是否可以正确地从这个句子中删除关键词。\"\\n关键词列表:[‘测试’,‘模型’]", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl/audio/audio_0.wav", "target_text": "删除包含所有给定关键词的子字符串后,文本变为:\"这是一个句子,目的是看看是否可以正确地从这个句子中删除关键词。\""} +{"split_name": "train_3.5M_CN_ready4cosy_wo_code_switching", "index": 16012449, "round": 2, "target_token": [742, 433, 553, 512, 1460, 1460, 223, 608, 35, 317, 441, 260, 260, 107, 1923, 380, 600, 597, 290, 427, 55, 84, 523, 492, 515, 473, 2579, 2828, 1460, 471, 623, 54, 581, 462, 364, 254, 332, 2344, 2344, 523, 390, 4, 3889, 290, 197, 507, 1235, 458, 458, 580, 70, 205, 662, 4, 141, 519, 1923, 44, 270, 240, 275, 97, 737, 889, 51, 51, 271, 149, 2691, 77, 1596, 1382, 728, 179, 380, 123, 234, 605, 218, 3290, 3823, 3539, 568, 568, 3945, 1596, 1596, 174, 235, 563, 92, 608, 302, 159, 446, 54, 298, 309, 67, 112, 206, 351, 456, 232, 84, 342, 245, 227, 101, 1382, 579, 1596, 406, 253, 1214, 656, 529, 1104, 22, 719, 23, 2302, 541, 464, 378, 378, 421, 1382, 732, 674, 100, 358, 189, 584, 72, 112, 737, 48, 64, 579, 672, 58, 553, 553, 512, 333, 1460, 1460, 550, 471, 223, 341, 302, 481, 300, 707, 744, 730, 658, 658, 1089, 1089, 6, 220, 4, 386, 448, 448, 484, 33, 2514, 422, 154, 423, 77, 2691, 2691, 77, 3945, 3539, 2644, 4085, 4085, 364, 276, 209, 437, 498, 378, 273, 2344, 422, 133, 321, 274, 274, 274, 2691, 2691, 389, 406, 390, 376, 523, 523, 120, 720, 263, 263, 121, 277, 277, 311, 195, 195, 394, 668, 436, 398, 331, 331, 508, 508, 508, 508, 508, 508, 508, 508, 508, 508, 508, 508, 508, 508, 508, 331, 1089, 720, 287, 227, 227, 143, 371, 3, 129, 64, 1930, 47, 187, 281, 3605, 286, 245, 570, 58, 58, 58, 672, 471, 889, 737, 118, 63, 576, 498, 154, 260, 341, 547, 556, 54, 298, 425, 67, 112, 674, 72, 257, 364, 209, 437, 498, 378, 273, 90, 2031, 2031, 2031, 25, 25, 196, 39, 101, 130, 185, 395, 331, 331, 508, 508, 508, 508, 508, 508, 508, 508, 508, 249, 331, 144, 473, 204, 584, 597, 386, 448, 290, 33, 33, 348, 348, 283, 179, 427, 358, 8, 331, 331, 508, 508, 508, 508, 508, 508, 508, 508, 508, 508, 508, 658, 1089, 208, 425, 153, 183, 363, 594, 621, 351, 212, 515, 3965, 3965, 153, 527, 407, 160, 269, 269, 157, 422, 707, 309, 558, 245, 94, 2057, 342, 6, 153, 73, 565, 565, 1354, 1354, 47, 1354, 323, 135, 159, 429, 490, 418, 418, 418, 346, 608, 341, 668, 311, 84, 824, 54, 44, 48, 207, 207, 207, 1930, 235, 1228, 168, 1519, 1519, 407, 351, 383, 699, 391, 621, 473, 733, 184, 141, 217, 385, 16, 20, 626, 552, 36, 408, 507, 562, 2844, 577, 534, 351, 212, 465, 3766, 246, 227, 100, 50, 423, 337, 3836, 533, 655, 51, 92, 277, 28, 263, 263, 570, 257, 574, 2344, 686, 199, 25, 306, 208, 584, 363, 726, 540, 50, 732, 536, 1104, 253, 538, 55, 235, 455, 3965, 506, 210, 476, 94, 206, 441, 346, 441, 424, 608, 346, 502, 688, 504, 433, 77, 149, 389, 390, 535, 477, 613, 1037, 584, 227, 492, 3466, 343, 346, 503, 154, 326, 86, 1228, 208, 2992, 23, 3965, 506, 3052, 1354, 3950, 346, 503, 357, 346, 608, 273, 456, 704, 227, 362, 354, 129, 535, 390, 390, 390, 41, 1596, 65, 143, 465, 465, 246, 726, 514, 177, 152, 539, 507, 240, 298, 476, 558, 227, 227, 371, 224, 389, 1381, 415, 3132, 377, 149, 92, 223, 16, 507, 205, 493, 626, 380, 1531, 755, 57, 102, 1037, 270, 123, 600, 356, 356, 193, 193, 418, 503, 608, 608, 346, 346, 394, 673, 85, 75, 75, 71, 440, 440, 237, 237, 237, 230, 53, 473, 3821, 720, 52, 227, 143, 371, 224, 21, 64, 591, 187, 690, 1519, 160, 1942, 2280, 4, 234, 157, 39, 90, 344, 30, 1479, 1988, 149, 1381, 63, 289, 498, 498, 2386, 2386, 354, 21, 415, 174, 77, 2691, 2691, 3575, 3575, 77, 92, 311, 590, 223, 82, 668], "source_text": " USER: 给定一段文本和关键词列表,删除文本中包含所有给定关键词的子字符串。\n文本:\"这是一个测试句子,目的是看看模型是否可以正确地从这个句子中删除关键词。\"\\n关键词列表:[‘测试’,‘模型’] ASSISTANT: 删除包含所有给定关键词的子字符串后,文本变为:\"这是一个句子,目的是看看是否可以正确地从这个句子中删除关键词。\" : 好的。现在请你将这个文本中的所有的逗号都替换成空格。", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl/audio/audio_1.wav", "target_text": "好的,请稍等一下,现在我会将文本中的所有逗号替换为空格。处理后文本为:\"这是一个句子 目的是看看是否可以正确地从这个句子中删除关键词。\"。处理结果如何?"} +{"split_name": "train_3.5M_CN_ready4cosy_wo_code_switching", "index": 28189110, "round": 1, "target_token": [742, 2280, 412, 1529, 674, 612, 603, 4030, 4030, 4030, 2503, 228, 218, 734, 1938, 176, 86, 683, 585, 343, 110, 173, 44, 100, 1930, 47, 47, 47, 1354, 422, 2514, 157, 160, 25, 2031, 273, 489, 399, 215, 351, 212, 465, 3766, 36, 653, 352, 202, 946, 462, 579, 579, 3276, 3276, 3276, 3276, 3276, 3276, 3276, 3276, 2849, 276, 3950, 437, 187, 190, 2344, 388, 41, 87, 406, 33, 33, 2989, 514, 338, 568, 579, 1596, 1596, 467, 54, 470, 425, 1073, 892, 419, 892, 457, 298, 298, 425, 132, 512, 58, 58, 672, 550, 312, 364, 232, 55, 160, 388, 90, 644, 1006, 59, 150, 529, 624, 690, 47, 1354, 323, 277, 314, 77, 2691, 2691, 77, 444, 1460, 3823, 3823, 3290, 188, 197, 403, 4, 3889, 386, 318, 308, 4030, 4085, 462, 946, 3276, 3276, 579, 579, 579, 1104, 171, 381, 381, 381, 307, 460, 1401, 542, 324, 2592, 2280, 600, 292, 206, 206, 184, 399, 605, 318, 386, 447, 2891, 4, 648, 205, 400, 626, 55, 1401, 460, 1988, 67, 433, 224, 2691, 149, 535, 51, 21, 277, 96, 596, 42, 309, 67, 603, 601, 3539, 3945, 1104, 349, 432, 3950, 370, 27, 27, 27, 400, 90, 401, 426, 3276, 2849, 312, 486, 1035, 171, 607, 395, 369, 331, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 369, 6, 482, 823, 153, 226, 621, 728, 167, 135, 270, 87, 406, 235, 618, 391, 94, 245, 734, 1938, 419, 513, 69, 240, 403, 3889, 220, 141, 612, 1593, 1006, 311, 609, 378, 205, 597, 719, 719, 3889, 4, 2891, 3889, 3889, 386, 380, 513, 1531, 234, 605, 605, 605, 605, 605, 605, 605, 605, 605, 605, 605, 605, 3605, 47, 165, 423, 174, 174, 48, 889, 471, 569, 612, 4030, 188, 1923, 283, 33, 33, 33, 33, 27, 348, 3350, 2031, 7, 115, 57, 57, 57, 9, 102, 102, 3950, 102, 69, 69, 483, 201, 666, 144, 222, 279, 440, 440, 440, 440, 440, 440, 440, 440, 53, 440, 53, 53, 589, 669, 631, 6, 720, 823, 363, 210, 141, 275, 177, 115, 137, 270, 460, 298, 2057, 726, 383, 141, 217, 217, 601, 341, 9, 135, 3950, 513, 173, 44, 100, 690, 47, 1354, 393, 173, 103, 52, 143, 143, 143, 143, 581, 207, 325, 1381, 101, 187, 504, 212, 745, 316, 263, 224, 672, 414, 477, 432, 537, 388, 348, 348, 310, 27, 623, 472, 283, 283, 283, 283, 469, 283, 499, 499, 629, 629, 513, 483, 270, 270, 252, 3406, 3406, 3406, 1988, 552, 369, 331, 3190, 106, 589, 473, 3052, 422, 401, 644, 2828, 312, 579, 579, 1104, 712, 434, 252, 298, 298, 1354, 1354, 1354, 445, 445, 445, 445, 445, 445, 445, 445, 79, 383, 699, 4, 3889, 4, 4, 168, 89, 1531, 605, 629, 403, 4, 3889, 3889, 4, 4, 234, 629, 179, 2057, 492, 393, 250, 100, 286, 690, 557, 323, 445, 323, 175, 527, 527, 172, 328, 629, 1519, 157, 1531, 25, 137, 750, 42, 1261, 600, 322, 356, 337, 3966, 457, 419, 513, 391, 3052, 190, 401, 115, 115, 115, 57, 102, 1037, 122, 270, 187, 412, 441, 206, 245, 2437, 223, 1006, 3, 467, 3108, 3108, 1073, 115, 332, 2344, 686, 199, 199, 348, 104, 235, 1988, 382, 600, 1935, 4, 386, 160, 199, 649, 2344, 523, 192, 1401, 701, 1059, 552, 369, 331, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 331, 331, 331, 8, 182, 591, 241, 103, 74, 74, 154, 1930, 559, 412, 193, 193, 714, 96, 114, 166, 716, 212, 3766, 515, 3766, 226, 190, 218, 479, 593, 1037, 122, 493, 87, 14, 600, 714, 719, 4, 386, 386, 386, 448, 380, 25, 686, 2454, 2454, 265, 244, 89, 605, 89, 605, 175, 492, 173, 514, 539, 1593, 3945, 1596, 1596, 390, 655, 51, 655, 21, 84, 3108, 3108, 1214, 742, 302, 159, 159, 159, 1037, 306, 232, 1401, 542, 1988, 211, 220, 363, 726, 212, 210, 141, 197, 519, 273, 110, 437, 32, 32, 32, 32, 32, 3052, 160, 199, 269, 199, 120, 293, 293, 203, 921, 353, 140, 369, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 331, 331, 1091, 1091, 358, 389, 655, 39, 48, 737, 533, 3836, 704, 77, 97, 3823, 458, 736, 115, 238, 232, 482, 2280, 356, 356, 356, 10, 559, 441, 260, 289, 358, 375, 42, 132, 550, 458, 458, 2849, 436, 531, 279, 279, 279, 75, 75, 141, 36, 652, 395, 98, 412, 518, 238, 676, 676, 175, 127, 618, 391, 173, 540, 595, 467, 3108, 3108, 101, 90, 2344, 25, 3350, 495, 626, 116, 599, 4, 290, 39, 2344, 523, 180, 1037, 14, 466, 143, 227, 52, 498, 461, 159, 52, 3836, 227, 556, 289, 314, 271, 523, 517, 3370, 240, 597, 4, 3889, 3889, 3889, 386, 318, 179, 585, 175, 44, 460, 460, 460, 253, 3052, 3055, 388, 523, 523, 722, 41, 307, 626, 119, 482, 67, 257, 424, 608, 608, 608, 608, 245, 286, 179, 1519, 33, 27, 27, 348, 199, 199, 199, 199, 293, 293, 3370, 470, 87, 1104, 348, 138, 138, 388, 199, 523, 55, 253, 298, 87, 1988, 220, 4, 386, 290, 629, 179, 475, 388, 1405, 523, 523, 437, 534, 235, 534, 241, 359, 498, 277, 277, 74, 483, 116, 2280, 600, 356, 46, 518, 238, 256, 597, 386, 3889, 3889, 599, 290, 519, 338, 2828, 312, 312, 649, 213, 11, 240, 452, 194, 363, 452, 335, 344, 223, 59, 608, 192, 116, 96, 302, 56, 56, 422, 1354, 1354, 1354, 690, 79, 273, 302, 135, 1942, 750, 720, 6, 67, 374, 2437, 150, 314, 271, 271, 523, 319, 1104, 672, 77, 723, 369, 331, 331, 331, 331, 331, 508, 508, 730, 730, 730, 730, 730, 730, 730, 658, 658, 669, 1988, 290, 662, 4, 4, 4, 386, 386, 318, 283, 283, 348, 348, 613, 55, 55, 1104, 348, 199, 199, 2344, 90, 401, 2330, 409, 409, 110, 175, 235, 6, 1261, 1935, 4, 386, 386, 386, 290, 484, 1596, 388, 1405, 523, 523, 138, 318, 597, 4, 662, 4, 4, 386, 290, 495, 348, 138, 199, 293, 293, 293, 542, 470, 1988], "source_text": " : 写一首关于自然美景的诗歌。", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl/audio/audio_2.wav", "target_text": "在绿野上,羚羊奔跑\n鸟语花香在心头荡漾\n涓涓小溪,蜿蜒、潺潺\n绿树成荫,凉雨淅淅沥沥\n自然的美景,如此神奇\n让我们沉迷,无法自拔\n在这美景之中,心灵得以宁静\n如此小小的悦动,细腻而清新"} +{"split_name": "train_3.5M_CN_ready4cosy_wo_code_switching", "index": 28189110, "round": 2, "target_token": [742, 110, 362, 74, 325, 548, 48, 118, 150, 245, 86, 232, 55, 160, 138, 2454, 244, 664, 684, 104, 406, 403, 719, 3889, 4, 386, 95, 101, 311, 314, 77, 512, 620, 2828, 2828, 3276, 217, 217, 553, 488, 488, 713, 90, 273, 130, 750, 591, 2344, 199, 388, 175, 241, 32, 185, 10, 3836, 132, 64, 655, 390, 390, 41, 41, 460, 381, 3132, 149, 274, 1381, 63, 64, 889, 471, 333, 333, 333, 333, 1596, 390, 390, 415, 271, 311, 595, 187, 211, 1938, 36, 335, 351, 3055, 629, 503, 412, 629, 259, 219, 645, 647, 88, 55, 390, 138, 138, 593, 1037, 55, 41, 626, 2992, 183, 565, 565, 506, 565, 287, 173, 1354, 1354, 323, 323, 323, 323, 323, 323, 445, 56, 151, 345, 4, 719, 193, 600, 3966, 1529, 355, 3290, 99, 647, 647, 348, 1104, 1104, 33, 283, 188, 275, 385, 62, 261, 261, 261, 3823, 3823, 3823, 458, 1235, 426, 13, 28, 75, 75, 440, 442, 442, 264, 3158, 334, 3821, 661, 299, 4, 3889, 386, 318, 676, 275, 385, 3823, 261, 620, 1460, 2185, 467, 3108, 3108, 446, 453, 341, 9, 9, 102, 364, 432, 414, 1596, 484, 2691, 2691, 389, 64, 1382, 86, 596, 220, 12, 4, 386, 386, 95, 388, 55, 32, 253, 460, 460, 1988, 317, 228, 15, 1460, 1006, 668, 668, 668, 649, 1035, 360, 680, 144, 685, 3190, 3190, 3190, 185, 362, 362, 74, 174, 174, 48, 732, 732, 206, 2057, 55, 390, 138, 469, 406, 104, 104, 719, 4, 4, 448, 95, 79, 277, 633, 77, 672, 312, 2828, 338, 4085, 476, 47, 187, 1397, 247, 551, 3052, 515, 515, 515, 227, 212, 110, 101, 277, 437, 498, 302, 341, 2849, 312, 633, 310, 87, 77, 2691, 2691, 77, 444, 97, 3539, 620, 1235, 1235, 1235, 620, 13, 2444, 144, 75, 3238, 440, 440, 264, 1357, 237, 53, 473, 631, 8, 128, 1529, 603, 97, 65, 54, 253, 889, 376, 596, 263, 558, 402, 326, 323, 557, 293, 2992, 153, 515, 506, 466, 173, 1354, 445, 73, 699, 386, 4, 4, 4, 4, 648, 172, 157, 1519, 179, 218, 228, 4030, 4030, 4085, 4085, 601, 4085, 4085, 4085, 107, 107, 69, 69, 69, 483, 483, 201, 661, 666, 222, 75, 440, 440, 440, 440, 2444, 501, 3821, 1091, 98, 706, 742, 190, 2031, 686, 686, 199, 715, 342, 3094, 1935, 12, 4, 234, 157, 348, 336, 475, 2514, 242, 684, 270, 307, 420, 1104, 536, 77, 2691, 2691, 389, 48, 731, 591, 110, 1206, 1, 340, 232, 446, 297, 115, 115, 332, 593, 1037, 55, 87, 626, 2280, 1935, 4, 648, 160, 157, 388, 32, 613, 307, 307, 41, 626, 110, 362, 325, 174, 3132, 325, 732, 540, 205, 1037, 138, 138, 283, 406, 197, 4, 386, 318, 519, 3823, 620, 312, 477, 84, 626, 211, 823, 595, 423, 325, 540, 1942, 1935, 629, 259, 275, 3823, 3823, 2644, 568, 533, 155, 704, 213, 444, 1596, 390, 41, 41, 32, 1104, 504, 194, 23, 23, 688, 556, 387, 118, 1596, 535, 390, 390, 390, 406, 406, 381, 381, 171, 77, 2712, 436, 279, 2444, 1357, 1357, 1357, 34, 631, 3821, 42, 745, 263, 665, 570, 728, 286, 39, 39, 3605, 702, 558, 263, 3575, 553, 512, 550, 1006, 311, 609, 110, 527, 187, 594, 323, 1354, 629, 203, 153, 565, 565, 506, 287, 591, 103, 527, 133, 483, 187, 2057, 3605, 247, 33, 348, 348, 1700, 417, 720, 2280, 1935, 4, 648, 157, 2752, 177, 338, 2644, 3276, 3276, 579, 579, 579, 579, 1104, 1104, 434, 511, 144, 2444, 222, 440, 1357, 53, 707, 3821, 1091, 42, 132, 67, 275, 385, 3823, 3823, 261, 3823, 3290, 256, 265, 1942, 456, 2344, 199, 269, 269, 545, 293, 686, 686, 669, 534, 117, 224, 274, 149, 389, 64, 742, 1228, 391, 594, 257, 653, 553, 62, 700, 488, 488, 713, 332, 137, 1037, 1037, 102, 57, 115, 115, 102, 1037, 270, 460, 298, 1988, 584, 72, 2645, 1460, 1006, 649, 649, 668, 649, 195, 195, 742, 2057, 344, 261, 539, 2579, 2579, 2579, 50, 112, 298, 470, 309, 132, 48, 732, 1529, 317, 292, 512, 672, 1596, 550, 312, 376, 613, 462, 603, 240, 493, 425, 1935, 597, 160, 2344, 84, 376, 41, 381, 235, 283, 2782, 3290, 2752, 577, 537, 59, 655, 824, 396, 358, 132, 423, 1930, 187, 391, 570, 3575, 77, 672, 535, 523, 376, 613, 462, 892, 419, 309, 67, 36, 316, 3575, 581, 258, 258, 439, 600, 356, 337, 570, 245, 59, 126, 58, 672, 312, 550, 2185, 467, 54, 65, 612, 2782, 2989, 475, 475, 475, 138, 2752, 456, 250, 437, 591, 2344, 199, 686, 686, 138, 83, 2592, 442, 230, 230, 440, 440, 440, 440, 440, 440, 440, 697, 162, 106, 3821, 710, 542, 310, 348, 1700, 138, 88, 20, 102, 653, 4013, 488, 553, 488, 488, 62, 713, 713, 507, 3055, 574, 321, 6, 263, 263, 263, 263, 224, 387, 245, 94, 422, 1519, 468, 269, 269, 1700, 1700, 199, 199, 686, 2454, 686, 25, 25, 605, 300, 686, 399, 300, 300, 300, 83, 83, 83, 701, 8, 666, 331, 331, 691, 691, 508, 508, 331, 1089, 1089, 189, 733, 725, 286, 1938, 89, 380, 493, 87, 648, 618, 1354, 690, 419, 503, 607, 694, 193, 559, 439, 513, 160, 199, 686, 56, 707, 702, 302, 399, 256, 597, 4, 4, 386, 290, 3605, 690, 87, 253, 307, 41, 55, 417, 618, 565, 565, 506, 473, 110, 154, 314, 274, 274, 77, 656, 92, 421, 3950, 351, 383, 3965, 3965, 565, 565, 565, 1354, 492, 1354, 1354, 172, 1519, 179, 514, 612, 1593, 1460, 4022, 550, 550, 550, 1006, 649, 1423, 117, 609, 750, 211, 2992, 2302, 478, 704, 581, 325, 1381, 92, 92, 421, 245, 608, 302, 378, 302, 302, 135, 380, 1228, 208, 339, 189, 1935, 2188, 597, 629, 283, 348, 388, 79, 32, 32, 253, 342, 132, 563, 277, 154, 527, 3950, 173, 351, 383, 699, 726, 427, 507, 115, 673, 180, 1037, 180, 1037, 2280, 193, 704, 346, 608, 394, 195, 195, 121, 1035], "source_text": " USER: 写一首关于自然美景的诗歌。 ASSISTANT: 在绿野上,羚羊奔跑\n鸟语花香在心头荡漾\n涓涓小溪,蜿蜒、潺潺\n绿树成荫,凉雨淅淅沥沥\n自然的美景,如此神奇\n让我们沉迷,无法自拔\n在这美景之中,心灵得以宁静\n如此小小的悦动,细腻而清新 : 给我推荐一些旅游景点。", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl/audio/audio_3.wav", "target_text": "如果您喜欢海滨风光,我推荐您去三亚、厦门或青岛;如果您喜欢历史文化,那么可以去西安、北京或者南京;如果您想度假放松,可以考虑去丽江、大理或者海南岛。当然,不同的景点有不同的特色和魅力,您还可以根据自己的兴趣和需要做进一步的选择。"} +{"split_name": "train_3.5M_CN_ready4cosy_wo_code_switching", "index": 28189110, "round": 3, "target_token": [742, 274, 656, 1382, 266, 569, 344, 574, 3466, 3466, 47, 457, 536, 87, 602, 282, 263, 289, 2185, 51, 51, 686, 384, 608, 350, 596, 67, 276, 3950, 1405, 32, 32, 79, 199, 199, 523, 523, 1037, 122, 270, 201, 123, 356, 356, 10, 559, 439, 674, 1073, 470, 358, 309, 132, 389, 732, 100, 823, 96, 58, 58, 77, 535, 271, 523, 376, 376, 54, 44, 67, 387, 674, 50, 173, 49, 727, 227, 227, 359, 354, 295, 21, 277, 110, 110, 110, 273, 1, 90, 3466, 3466, 343, 24, 314, 1381, 396, 358, 425, 511, 325, 1397, 317, 292, 3575, 77, 633, 523, 523, 376, 3108, 296, 276, 513, 629, 1923, 597, 719, 3889, 4, 386, 203, 686, 399, 673, 512, 553, 553, 553, 553, 512, 312, 312, 82, 82, 82, 668, 398, 144, 222, 440, 440, 440, 440, 440, 440, 440, 440, 53, 249, 501, 3821, 8, 185, 103, 133, 174, 235, 174, 48, 64, 387, 585, 232, 55, 390, 138, 138, 138, 469, 88, 307, 307, 41, 88, 406, 104, 403, 719, 3889, 1852, 4, 386, 95, 422, 311, 384, 389, 77, 2691, 2691, 672, 444, 2644, 338, 16, 16, 99, 99, 4085, 69, 69, 44, 298, 1988, 482, 558, 263, 316, 316, 3575, 570, 528, 424, 341, 1423, 580, 3290, 601, 385, 275, 78, 507, 507, 2782, 2782, 2782, 228, 513, 250, 96, 3290, 713, 115, 332, 138, 199, 353, 332, 115, 1235, 364, 20, 20, 507, 3480, 202, 570, 54, 44, 69, 641, 1923, 4, 4, 290, 427, 562, 458, 1006, 1006, 150, 63, 536, 3132, 650, 282, 274, 51, 2185, 649, 195, 649, 149, 280, 436, 71, 440, 440, 1357, 440, 440, 440, 440, 440, 53, 2444, 631, 3821, 1089, 282, 558, 263, 665, 316, 570, 2437, 245, 728, 286, 1519, 33, 247, 3605, 36, 282, 558, 3575, 553, 553, 512, 1460, 1460, 1006, 609, 110, 527, 591, 323, 133, 47, 201, 650, 226, 422, 2031, 3350, 160, 1405, 523, 199, 25, 137, 1037, 1037, 122, 232, 67, 206, 892, 2437, 2437, 892, 624, 173, 391, 650, 227, 227, 706, 224, 2691, 2691, 389, 535, 271, 271, 83, 415, 415, 174, 174, 174, 325, 483, 38, 584, 153, 565, 515, 506, 3052, 690, 283, 315, 386, 197, 539, 3823, 312, 312, 2849, 121, 437, 437, 437, 79, 199, 199, 199, 199, 523, 180, 180, 437, 241, 128, 1059, 552, 124, 1091, 226, 287, 3052, 1354, 323, 110, 110, 498, 498, 354, 277, 413, 21, 21, 21, 265, 92, 605, 605, 605, 605, 605, 605, 63, 63, 63, 48, 732, 1035, 2712, 436, 2444, 222, 440, 440, 222, 222, 646, 669, 3821, 1089, 1091, 189, 823, 570, 112, 118, 732, 63, 387, 556, 358, 309, 168, 441, 2437, 590, 2185, 51, 311, 311, 311, 311, 384, 384, 384, 121, 680, 680, 291, 485, 701, 485, 696, 3190, 3190, 3190, 3190, 691, 691, 331, 1091, 1091, 1091, 425, 1073, 444, 1460, 1006, 649, 51, 750, 358, 132, 2185, 1006, 550, 471, 258, 674, 1228, 309, 67, 603, 72, 608, 608, 2849, 180, 180, 542, 93, 642, 8, 696, 3190, 3190, 691, 3190, 8, 446, 362, 416, 336, 174, 1988, 48, 325, 387, 514, 2057, 55, 593, 138, 469, 469, 406, 197, 719, 3889, 3889, 4, 448, 95, 56, 609, 384, 77, 2691, 2691, 1596, 333, 568, 261, 16, 99, 1923, 493, 55, 317, 12, 386, 225, 448, 203, 310, 199, 686, 729, 3889, 648, 25, 2344, 523, 722, 381, 298, 207, 220, 2989, 214, 141, 217, 1460, 312, 523, 523, 579, 613, 493, 558, 227, 103, 731, 2299, 444, 333, 333, 568, 606, 2579, 50, 690, 33, 203, 3052, 79, 311, 384, 609, 456, 492, 187, 100, 1307, 33, 247, 540, 358, 67, 1938, 387, 321, 42, 282, 263, 2185, 2185, 649, 384, 384, 120, 377, 211, 274, 274, 2185, 2185, 51, 649, 649, 121, 652, 436, 71, 440, 1357, 1357, 1357, 440, 53, 53, 2444, 631, 1091, 282, 263, 263, 316, 546, 728, 3055, 3605, 151, 650, 558, 665, 553, 553, 512, 1460, 1006, 263, 489, 110, 527, 100, 3950, 1354, 1354, 422, 157, 495, 313, 386, 3889, 2891, 4, 290, 308, 259, 641, 403, 719, 3889, 662, 318, 519, 713, 458, 2849, 545, 443, 609, 300, 527, 2057, 514, 275, 202, 3945, 1104, 1104, 946, 4085, 20, 537, 467, 3108, 79, 2344, 199, 523, 192, 437, 1037, 1037, 270, 559, 356, 356, 356, 123, 346, 439, 441, 302, 135, 86, 456, 358, 226, 110, 445, 422, 79, 537, 409, 498, 354, 498, 129, 129, 413, 413, 413, 413, 1381, 1381, 63, 74, 483, 1228, 309, 696, 696, 696, 696, 1089, 1091, 42, 317, 67, 275, 385, 385, 62, 261, 3823, 713, 115, 256, 513, 393, 187, 100, 3605, 33, 27, 1700, 1700, 1700, 199, 269, 269, 686, 95, 742, 284, 385, 3539, 3539, 3823, 713, 593, 1942, 694, 231, 4, 386, 386, 495, 605, 137, 596, 67, 1, 341, 2849, 477, 477, 446, 110, 362, 362, 325, 174, 366, 48, 732, 114, 2057, 55, 376, 1700, 199, 138, 593, 232, 493, 87, 596, 823, 624, 621, 94, 259, 259, 3055, 286, 154, 321, 339, 124, 1089, 263, 263, 656, 889, 51, 51, 21, 686, 384, 277, 346, 750, 596, 168, 135, 683, 892, 892, 892, 424, 424, 421, 540, 425, 309, 226, 225, 448, 203, 310, 388, 55, 122, 626, 52, 362, 1382, 63, 133, 624, 289, 311, 277, 84, 596, 742, 225, 448, 448, 484, 3350, 2752, 2844, 364, 3276, 1104, 434, 512, 58, 553, 553, 3945, 444, 333, 333, 1006, 199, 312, 312, 1035, 280, 398, 71, 75, 440, 237, 442, 140, 140, 334, 710, 545, 203, 2723, 2344, 445, 342, 733, 558, 36, 521, 728, 286, 3605, 3605, 483, 665, 3575, 553, 553, 512, 1460, 1006, 311, 609, 110, 1228, 742, 3950, 343, 1354, 133, 576, 354, 731, 514, 3055, 283, 205, 141, 488, 488, 217, 62, 385, 188, 626, 2992, 73, 183, 565, 565, 506, 1228, 445, 461, 686, 199, 43, 4, 4, 318, 605, 683, 308, 597, 4, 290, 197, 3290, 261, 1104, 406, 406, 174, 554, 258, 202, 96, 493, 96, 107, 402, 467, 3108, 1405, 79, 199, 199, 523, 523, 437, 13, 252, 87, 14, 2280, 356, 193, 559, 457, 439, 441, 135, 56, 1228, 396, 375, 226, 3052, 445, 445, 498, 498, 354, 354, 101, 101, 120, 413, 1381, 1381, 48, 732, 581, 2712, 117, 2444, 1357, 440, 1357, 71, 71, 71, 671, 710, 3821, 542, 352, 3823, 3823, 3539, 444, 1073, 470, 358, 132, 740, 103, 250, 2645, 152, 3823, 3823, 1593, 569, 674, 96, 358, 584, 391, 30, 325, 548, 1381, 605, 63, 63, 118, 118, 2437, 2437, 728, 11, 597, 2891, 3965, 565, 492, 445, 56, 137, 903, 386, 214, 408, 352, 4022, 889, 2299, 112, 240, 1261, 73, 565, 506, 506, 473, 427, 519, 115, 57, 673, 102, 180, 1037, 1401, 626, 42, 220, 72, 260, 59, 2849, 1405, 437, 982], "source_text": " USER: 写一首关于自然美景的诗歌。 ASSISTANT: 在绿野上,羚羊奔跑\n鸟语花香在心头荡漾\n涓涓小溪,蜿蜒、潺潺\n绿树成荫,凉雨淅淅沥沥\n自然的美景,如此神奇\n让我们沉迷,无法自拔\n在这美景之中,心灵得以宁静\n如此小小的悦动,细腻而清新 USER: 给我推荐一些旅游景点。 ASSISTANT: 如果您喜欢海滨风光,我推荐您去三亚、厦门或青岛;如果您喜欢历史文化,那么可以去西安、北京或者南京;如果您想度假放松,可以考虑去丽江、大理或者海南岛。当然,不同的景点有不同的特色和魅力,您还可以根据自己的兴趣和需要做进一步的选择。 : 帮我想一些关于狗的名字。", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl/audio/audio_4.wav", "target_text": "关于狗的名字,不同的主人有不同的喜好。如果您喜欢可爱、懒洋洋的小狗,可以考虑给您的宠物取小名,比如“豆豆”、“宝宝”等;如果您喜欢体型强壮而有力的狗狗,可以考虑一些响亮的名字,比如“大力”、“霸气”等;如果您对狗的品种有偏好,也可以考虑为它取一些相关的名字,比如“拉布拉多”、“雪橇犬”等。"} +{"split_name": "train_3.5M_CN_ready4cosy_wo_code_switching", "index": 28189110, "round": 4, "target_token": [742, 456, 175, 166, 254, 432, 613, 253, 406, 1988, 274, 1381, 48, 174, 406, 207, 298, 87, 558, 227, 728, 504, 483, 540, 419, 270, 253, 1214, 570, 355, 349, 349, 437, 376, 376, 376, 41, 381, 185, 317, 346, 419, 419, 892, 892, 892, 276, 683, 513, 1942, 309, 1261, 3006, 12, 386, 629, 3605, 173, 52, 143, 227, 1073, 365, 414, 337, 3836, 77, 444, 3539, 3823, 62, 275, 188, 584, 363, 11, 507, 562, 2844, 138, 199, 199, 523, 523, 1479, 220, 95, 56, 498, 613, 32, 393, 235, 700, 47, 47, 1930, 674, 603, 22, 3827, 653, 713, 115, 273, 79, 422, 445, 445, 537, 421, 548, 366, 3158, 149, 274, 2691, 77, 512, 217, 3539, 3823, 713, 115, 115, 70, 70, 70, 141, 144, 222, 75, 440, 440, 1357, 1357, 71, 71, 75, 2444, 436, 602, 3821, 1091, 358, 425, 213, 4022, 4022, 889, 64, 48, 389, 552, 149, 274, 743, 743, 77, 389, 174, 1381, 64, 372, 372, 372, 372, 372, 267, 305, 305, 305, 2185, 424, 453, 402, 335, 212, 465, 3766, 3766, 3766, 391, 540, 728, 114, 38, 298, 298, 3406, 596, 100, 732, 325, 325, 74, 402, 383, 3965, 565, 506, 492, 445, 56, 56, 196, 308, 197, 386, 719, 4, 4, 4, 4, 4, 290, 160, 1700, 199, 199, 199, 199, 199, 157, 25, 138, 157, 283, 283, 235, 309, 1261, 502, 1935, 648, 1531, 1519, 247, 403, 662, 4, 386, 290, 3605, 690, 1354, 49, 515, 565, 506, 492, 445, 56, 56, 25, 390, 403, 4, 4, 4, 4, 4, 4, 448, 172, 1700, 199, 199, 25, 25, 25, 25, 25, 25, 86, 513, 380, 607, 382, 322, 10, 337, 100, 674, 540, 173, 2579, 401, 401, 401, 4030, 4030, 514, 343, 3466, 1354, 241, 722, 445, 2344, 199, 332, 115, 115, 205, 493, 626, 226, 227, 143, 371, 371, 274, 389, 336, 413, 563, 65, 241, 460, 1988, 226, 690, 2514, 157, 199, 199, 686, 25, 25, 25, 25, 25, 137, 6, 2280, 4, 318, 629, 419, 466, 455, 541, 434, 352, 4085, 99, 593, 1037, 122, 55, 626, 2280, 1935, 1852, 4, 648, 172, 157, 2514, 3052, 466, 212, 212, 465, 143, 143, 227, 704, 50, 50, 103, 173, 175, 1037, 203, 153, 565, 506, 445, 56, 378, 273, 302, 1, 479, 596, 6, 317, 374, 223, 1006, 649, 523, 312, 2849, 28, 436, 71, 71, 440, 440, 440, 1357, 440, 440, 440, 71, 71, 671, 443, 631, 1357, 23, 478, 541, 36, 3827, 167, 378, 594, 527, 49, 227, 391, 1206, 728, 112, 258, 2437, 2437, 892, 276, 22, 719, 719, 194, 640, 544, 599, 67, 112, 97, 97, 603, 3480, 99, 99, 20, 20, 1037, 1037, 122, 1214, 591, 218, 475, 1519, 33, 472, 1700, 1700, 1700, 1700, 1700, 199, 199, 686, 686, 175, 212, 212, 143, 143, 143, 227, 371, 65, 74, 30, 173, 492, 700, 175, 1354, 133, 387, 742, 1214, 100, 690, 259, 2989, 275, 601, 261, 568, 3276, 579, 579, 672, 58, 3575, 2691, 274, 21, 311, 277, 22, 504, 478, 640, 3836, 3966, 1529, 97, 202, 4085, 4085, 3276, 24, 712, 532, 10, 3836, 77, 152, 3823, 458, 458, 1235, 15, 2386, 596, 67, 223, 3, 396, 1091, 425, 132, 3132, 21, 277, 556, 596, 823, 439, 529, 97, 202, 601, 755, 493, 55, 482, 412, 4, 448, 448, 203, 310, 388, 2344, 159, 342, 6, 558, 263, 2437, 471, 4022, 550, 550, 333, 550, 889, 889, 372, 372, 1596, 3945, 77, 2712, 436, 279, 440, 440, 440, 440, 230, 53, 230, 53, 106, 3821, 81, 38, 594, 537, 209, 633, 122, 253, 482, 274, 1381, 174, 174, 207, 44, 493, 733, 227, 227, 227, 483, 540, 1529, 1214, 122, 613, 462, 3276, 59, 59, 364, 577, 388, 199, 25, 332, 159, 750, 2280, 600, 361, 516, 645, 601, 645, 645, 645, 177, 177, 1, 1, 184, 399, 184, 489, 34, 982, 42, 1091, 425, 132, 655, 563, 277, 596, 584, 556, 154, 529, 97, 202, 202, 4085, 296, 122, 122, 122, 750, 317, 260, 260, 260, 424, 424, 608, 302, 135, 1942, 375, 124, 3006, 1935, 4, 403, 247, 247, 690, 65, 32, 3108, 3108, 1405, 470, 602, 309, 132, 389, 48, 1381, 415, 415, 415, 415, 2691, 2691, 2691, 2691, 274, 92, 2437, 683, 557, 27, 490, 348, 3350, 7, 115, 115, 34, 192, 106, 192, 221, 42, 706, 3052, 1519, 3350, 2031, 7, 115, 57, 673, 180, 742, 563, 74, 1930, 281, 3605, 1519, 1531, 374, 391, 265, 250, 745, 15, 713, 713, 115, 115, 7, 7, 7, 7, 630, 140, 140, 2592, 697, 442, 230, 230, 440, 440, 230, 230, 230, 230, 473, 631, 287, 589, 3052, 351, 537, 432, 253, 626, 274, 92, 336, 406, 145, 721, 298, 1988, 227, 227, 391, 540, 206, 340, 232, 296, 355, 3276, 349, 1405, 364, 38, 55, 317, 12, 95, 2752, 3290, 3823, 568, 333, 1596, 1596, 406, 406, 77, 2691, 77, 737, 387, 343, 94, 130, 396, 425, 132, 326, 218, 475, 3055, 244, 3950, 623, 119, 149, 274, 656, 889, 550, 1460, 312, 523, 2849, 2849, 2849, 13, 127, 3836, 98, 10, 425, 112, 612, 601, 3480, 3480, 99, 31, 574, 422, 1354, 1354, 1354, 1354, 690, 690, 3466, 310, 87, 32, 370, 343, 624, 423, 655, 390, 390, 414, 3950, 175, 445, 35, 35, 445, 445, 445, 445, 445, 445, 159, 439, 490, 282, 659, 558, 263, 732, 1930, 1930, 74, 123, 116, 387, 387, 569, 190, 172, 1228, 2057, 2514, 2344, 635, 180, 635, 2454, 461, 319, 1354, 1354, 1354, 1354, 1354, 79, 378, 277, 396, 358, 425, 2185, 4022, 1460, 471, 889, 48, 174, 596, 482, 149, 2691, 149, 389, 48, 64, 372, 372, 2185, 2185, 305, 2437, 2437, 223, 453, 558, 184, 399, 686, 83, 293, 27, 33, 33, 33, 33, 283, 1923, 100, 393, 527, 2057, 3055, 177, 338, 2828, 1460, 1460, 1006, 550, 550, 550, 550, 51, 311, 289, 513, 427, 220, 12, 386, 318, 2514, 422, 1354, 381, 173, 420, 44, 241, 362, 731, 1382, 372, 372, 535, 271, 390, 523, 376, 253, 596, 226, 227, 227, 100, 540, 419, 1214, 613, 355, 3276, 349, 414, 402, 493, 342, 733, 227, 227, 391, 173, 206, 135, 503, 123, 559, 441, 223, 1460, 1006, 649, 471, 608, 596, 309, 67, 260, 394, 311, 384, 677, 121, 443, 369, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 331, 331, 331, 3821, 720, 473, 701, 1354, 690, 540, 299, 662, 231, 518, 22, 22, 1942, 67, 672, 312, 271, 523, 633, 32, 402, 227, 227, 227, 515, 227, 391, 173, 722, 110, 594, 401, 338, 432, 613, 41, 310, 149, 655, 655, 1596, 721, 298, 626, 227, 227, 227, 173, 540, 38, 122, 1214, 72, 2644, 3276, 312, 349, 3276, 3276, 364, 20, 400, 348, 1104, 27, 27, 1519, 39, 2031, 1, 302, 361, 375, 186, 425, 67, 2782, 275, 275, 275, 275, 4030, 439, 276, 297, 115, 115, 115, 177, 245, 250, 527, 745, 1593, 569, 2437, 2437, 223, 223, 892, 439, 260, 394, 394, 673, 671, 144, 75, 222, 440, 75, 440, 3238, 75, 237, 71, 75, 75, 75, 53, 501, 3821, 106, 1228, 1214, 591, 154, 608, 245, 516, 135, 513, 513, 483, 47, 483, 187, 2057, 179, 1519, 157, 157, 199, 199, 199, 579, 348, 1104, 199, 348, 348, 348, 348, 353, 686, 686, 686, 2302, 83, 76, 33, 348, 199, 199, 2454, 1037, 1037, 532, 356, 356, 356, 356, 356, 322, 322, 559, 439, 674, 457, 358, 317, 603, 462, 349, 633, 672, 532, 3836, 77, 444, 333, 333, 3945, 467, 54, 32, 79, 551, 647, 99, 102, 613, 22, 57, 341, 314, 655, 174, 563, 423, 24, 20, 137, 232, 1037, 626, 342, 482, 168, 448, 386, 448, 448, 203, 269, 269, 686, 686, 686, 95, 700, 3965, 3965, 565, 565, 506, 3052, 690, 514, 569, 1593, 444, 471, 471, 889, 655, 415, 271, 21, 84, 437, 3108, 709, 341, 57, 205, 232, 1988, 153, 565, 506, 473, 427, 507, 338, 3276, 13, 672, 512, 512, 3945, 579, 1104, 1104, 672, 58, 58, 512, 3539, 1460, 1006, 668, 489, 300, 242, 187, 2057, 1519, 33, 27, 2514, 591, 358, 42, 42, 706, 3052, 1307, 33, 468, 199, 686, 56, 159, 361, 688, 3836, 2386, 154, 421, 129, 52, 650, 731, 227, 371, 224, 633, 65, 1228, 211, 227, 227, 50, 112, 732, 1596, 253, 1104, 613, 270, 313, 183, 506, 473, 335, 721, 516, 361, 503, 302, 302, 215, 3158, 153, 565, 506, 445, 56, 489, 300, 346, 3254, 382, 193, 46, 639, 433, 3575, 121, 608, 195, 195], "source_text": " USER: 写一首关于自然美景的诗歌。 ASSISTANT: 在绿野上,羚羊奔跑\n鸟语花香在心头荡漾\n涓涓小溪,蜿蜒、潺潺\n绿树成荫,凉雨淅淅沥沥\n自然的美景,如此神奇\n让我们沉迷,无法自拔\n在这美景之中,心灵得以宁静\n如此小小的悦动,细腻而清新 USER: 给我推荐一些旅游景点。 ASSISTANT: 如果您喜欢海滨风光,我推荐您去三亚、厦门或青岛;如果您喜欢历史文化,那么可以去西安、北京或者南京;如果您想度假放松,可以考虑去丽江、大理或者海南岛。当然,不同的景点有不同的特色和魅力,您还可以根据自己的兴趣和需要做进一步的选择。 USER: 帮我想一些关于狗的名字。 ASSISTANT: 关于狗的名字,不同的主人有不同的喜好。如果您喜欢可爱、懒洋洋的小狗,可以考虑给您的宠物取小名,比如“豆豆”、“宝宝”等;如果您喜欢体型强壮而有力的狗狗,可以考虑一些响亮的名字,比如“大力”、“霸气”等;如果您对狗的品种有偏好,也可以考虑为它取一些相关的名字,比如“拉布拉多”、“雪橇犬”等。 : 请给我介绍一下人工智能的技术和发展。", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl/audio/audio_5.wav", "target_text": "人工智能的技术发展已经越来越快,包括深度学习、机器学习、自然语言处理、计算机视觉等。随着算力、数据量和算法的不断提高,人工智能也在不断地进步和演变。未来,人工智能将会被广泛运用于各个领域,包括医疗、金融、智能制造等。与此同时,人工智能也带来了伦理、隐私等方面的问题,需要我们权衡好利弊,做出正确的决策。"} +{"split_name": "train_3.5M_CN_ready4cosy_wo_code_switching", "index": 66623028, "round": 1, "target_token": [284, 143, 227, 704, 3, 74, 277, 378, 445, 445, 445, 445, 445, 495, 4, 4, 4, 290, 308, 3055, 574, 402, 24, 24, 3276, 414, 54, 298, 309, 1073, 97, 97, 444, 471, 471, 361, 556, 72, 15, 1460, 1006, 263, 608, 250, 527, 374, 1, 2344, 353, 353, 422, 354, 358, 602, 287, 172, 269, 293, 686, 83, 345, 70, 141], "source_text": " : 根据输入的文本,判断它是否属于新闻报道、广告或博客文章类别之一。\n新闻报道:“奥运会在东京隆重开幕,中国代表团获得多项奖牌。”", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl/audio/audio_6.wav", "target_text": "属于新闻报道类别。"} +{"split_name": "train_3.5M_CN_ready4cosy_wo_code_switching", "index": 66623028, "round": 2, "target_token": [352, 502, 648, 2514, 94, 581, 87, 298, 298, 1988, 274, 48, 247, 732, 2437, 2503, 38, 342, 2280, 3006, 4, 403, 499, 247, 1307, 2514, 722, 52, 212, 143, 143, 246, 246, 143, 704, 581, 732, 48, 1381, 21, 21, 686, 686, 384, 384, 609, 34, 6, 363, 3766, 456, 690, 3605, 731, 358, 706, 427, 484, 647, 2782, 601, 3945, 1596, 174, 174, 563, 289, 516, 86, 232, 122, 55, 823, 227, 391, 190, 15, 2644, 568, 579, 1596, 145, 460, 493, 282, 227, 52, 416, 64, 64, 1596, 406, 460, 298, 626, 366, 23, 688, 556, 92, 550, 271, 461, 461, 523, 192, 149, 274, 64, 48, 406, 406, 1596, 613, 493, 226, 227, 143, 371, 371, 581, 74, 343, 629, 4, 386, 197, 188, 99, 601, 24, 432, 595, 2644, 3290, 297, 3950, 242, 1228, 190, 2514, 468, 3350, 713, 713, 458, 2849, 2849, 437, 523, 2849, 84, 14, 533, 382, 356, 356, 575, 193, 193, 714, 457, 457, 457, 439, 441, 608, 608, 609, 608, 59, 672, 2691, 2691, 2691, 77, 64, 2437, 556, 396, 317, 245, 302, 424, 424, 424, 424, 2437, 223, 341, 424, 394, 260, 260, 394, 394, 673, 673, 53, 53, 369, 1089, 211, 363, 363, 175, 422, 79, 707, 34, 300, 302, 609, 750, 600, 322, 356, 356, 356, 46, 504, 570, 209, 277, 164, 2691, 2691, 2691, 2691, 149, 655, 311, 453, 184, 90, 140], "source_text": " USER: 根据输入的文本,判断它是否属于新闻报道、广告或博客文章类别之一。\n新闻报道:“奥运会在东京隆重开幕,中国代表团获得多项奖牌。” ASSISTANT: 属于新闻报道类别。 : 很好,现在请你计算一下这篇文章中出现了多少个“获得”字样。", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl/audio/audio_7.wav", "target_text": "经过计数,这篇文章中总共出现了两次“获得”这个词汇。"} +{"split_name": "train_3.5M_CN_ready4cosy_wo_code_switching", "index": 12982263, "round": 1, "target_token": [570, 4, 290, 88, 283, 270, 298, 298, 406, 348, 647, 2752, 645, 509, 57, 516, 364, 276, 276, 516, 4085, 4085, 20, 69, 88, 270, 87, 298, 3406, 530, 2712, 544, 280, 1089, 189, 2280, 600, 694, 4, 403, 281, 3605, 87, 613, 307, 307, 376, 432, 376, 437, 437, 3108, 32, 1228, 2057, 3605, 247, 33, 33, 370, 445, 323, 354, 354, 532, 362, 362, 731, 94, 734, 276, 364, 209, 576, 161, 161, 498, 378, 273, 2031, 2031, 25, 25, 25, 25, 25, 25, 157, 1519, 104, 1988, 723, 600, 3889, 4, 386, 290, 427, 470, 467, 745, 152, 713, 3290, 3290, 177, 1593, 732, 48, 174, 415, 415, 21, 311, 2386, 212, 465, 3766, 504, 706, 709, 414, 568, 2644, 261, 261, 601, 742, 467, 3108, 437, 525, 90, 2344, 686, 686, 56, 445, 445, 378, 489, 184, 57, 57, 673, 671, 434, 663], "source_text": " : A公司去年亏损了500万美元,今年净利润增长了50%,今年的净利润是多少?", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl/audio/audio_8.wav", "target_text": "今年净利润为750万美元"} +{"split_name": "train_3.5M_CN_ready4cosy_wo_code_switching", "index": 95901966, "round": 1, "target_token": [742, 662, 318, 281, 612, 202, 946, 579, 579, 529, 344, 574, 475, 33, 27, 268, 3605, 1930, 112, 732, 48, 48, 63, 387, 503, 596, 2280, 714, 556, 2437, 51, 51, 311, 424, 2386, 187, 608, 223, 3, 824, 3108, 65, 550, 1460, 620, 620, 1235, 3823, 152, 275, 2989, 247, 33, 27, 27, 2723, 56, 722, 466, 212, 143, 143, 143, 246, 246, 714, 371, 581, 174, 1381, 74, 527, 527, 690, 259, 4030, 338, 2644, 3276, 3276, 2828, 364, 102, 232, 626, 211, 220, 2188, 823, 308, 514, 275, 601, 4085, 4085, 4085, 107, 516, 20, 513, 1037, 1037, 1037, 293, 192, 373, 180, 192, 720, 602, 3821, 3821, 132, 226, 313, 27, 27, 468, 422, 378, 277, 576, 498, 94, 2031, 2031, 2031, 2031, 39, 605, 89, 527, 151, 247, 127, 226, 557, 179, 177, 15, 2828, 2849, 2849, 2849, 2849, 402, 270, 342, 733, 94, 3766, 3766, 391, 594, 112, 425, 54, 65, 444, 3539, 620, 1235, 1235, 713, 3290, 2782, 283, 33, 27, 1700, 199, 2454, 86, 306, 6, 226, 227, 706, 731, 150, 655, 390, 390, 406, 381, 87, 241, 483, 597, 4, 259, 290, 676, 612, 97, 568, 261, 574, 2514, 33, 3605, 690, 112, 118, 63, 439, 116, 2280, 504, 595, 2185, 51, 311, 2386, 84, 213, 3108, 3108, 65, 550, 458, 458, 3823, 713, 2782, 2989, 283, 27, 27, 157, 407, 466, 212, 143, 356, 356, 714, 371, 389, 64, 742, 527, 2057, 574, 401, 338, 2828, 2828, 547, 378, 445, 445, 323, 684, 1942, 3254, 2302, 640, 504, 387, 64, 655, 535, 271, 635, 437, 620, 3108, 65, 550, 458, 736, 3823, 3823, 152, 275, 2989, 247, 33, 27, 468, 39, 3052, 212, 143, 143, 356, 704, 371, 511, 595, 1214, 38, 514, 344, 426, 3276, 82, 432, 3108, 570, 276, 683, 513, 1228, 596, 369, 369, 331, 331, 1091, 309, 226, 313, 269, 269, 269, 686, 56, 456, 28, 650, 465, 515, 227, 515, 49, 159, 159, 461, 378, 110, 110, 378, 302, 302, 206, 11, 433, 144, 75, 75, 440, 440, 75, 440, 442, 442, 140, 334, 366, 522, 4, 4, 290, 308, 218, 4030, 107, 69, 69, 201, 518, 640, 541, 570, 112, 202, 24, 402, 722, 55, 2386, 227, 143, 371, 371, 77, 63, 233, 116, 688, 504, 581, 656, 271, 271, 715, 3108, 3108, 213, 458, 458, 1235, 3823, 507, 2782, 283, 348, 27, 199, 2344, 351, 212, 143, 143, 143, 356, 371, 371, 511, 336, 414, 159, 306, 56, 2344, 90, 115, 341, 2849, 2849, 2849, 2849, 436, 2444, 147, 730, 730, 685, 331, 1089, 1988, 226, 690, 1930, 732, 325, 74, 103, 492, 227, 391, 594, 284, 130, 3108, 213, 1460, 458, 1235, 1235, 713, 115, 7, 138, 199, 27, 1104, 1700, 269, 269, 686, 686, 95, 140, 144, 75, 75, 75, 440, 442, 53, 53, 705, 106, 1089, 339, 382, 193, 3966, 238, 676, 676, 676, 218, 734, 1938, 683, 135, 102, 518, 522, 640, 541, 570, 112, 202, 24, 537, 351, 232, 55, 2386, 227, 143, 371, 371, 224, 77, 415, 415, 563, 277, 323, 175, 383, 2891, 3889, 386, 427, 275, 202, 568, 568, 364, 537, 577, 348, 27, 203, 690, 1930, 732, 63, 63, 439, 750, 2992, 688, 595, 311, 649, 311, 608, 192, 570, 424, 467, 3108, 65, 550, 458, 620, 3823, 152, 2782, 283, 348, 199, 199, 422, 351, 212, 143, 143, 356, 356, 371, 389, 423, 209, 159, 456, 56, 90, 297, 341, 2849, 2849, 2849, 436, 1035, 369, 685, 730, 730, 730, 66, 66, 68, 696, 696, 566, 2057, 585, 177, 341, 2849, 349, 432, 402, 270, 456, 227, 3766, 49, 3950, 173, 50, 1073, 54, 656, 333, 620, 620, 1235, 1235, 115, 7, 647, 348, 348, 1700, 27, 199, 686, 742, 600, 2057, 218, 2645, 2644, 568, 579, 579, 579, 579, 946, 1104, 420, 69, 513, 4, 408, 403, 719, 2891, 2891, 4, 4, 386, 308, 676, 601, 568, 150, 579, 579, 579, 3276, 3276, 364, 537, 400, 348, 27, 203, 2514, 690, 112, 732, 48, 48, 1381, 1381, 1381, 295, 527, 1228, 720, 677, 730, 658, 658, 658, 658, 658, 658, 658, 658, 658, 658, 1089, 520, 220, 714, 504, 2437, 2185, 51, 686, 384, 384, 609, 707, 1261, 96, 276, 3, 467, 3108, 213, 1460, 458, 458, 1235, 1235, 3823, 275, 2989, 283, 348, 348, 1700, 199, 407, 351, 212, 465, 143, 143, 227, 371, 389, 1596, 731, 173, 527, 286, 3055, 177, 601, 568, 312, 523, 2849, 364, 270, 342, 725, 227, 143, 371, 371, 224, 77, 389, 336, 295, 1354, 370, 33, 33, 1519, 594, 503, 2280, 518, 688, 96, 150, 314, 271, 664, 437, 2849, 620, 65, 444, 3823, 458, 1235, 1235, 62, 385, 275, 188, 283, 33, 348, 348, 523, 166, 466, 212, 143, 143, 143, 704, 511, 1596, 709, 1214, 527, 514, 507, 338, 2828, 312, 2849, 341, 102, 270, 626, 1988, 339, 1261, 2280, 3006, 2188, 12, 903, 269, 2454, 1700, 269, 1519, 179, 629, 1228, 42, 309, 504, 374, 683, 608, 311, 424, 569, 72, 358, 584, 570, 97, 444, 444, 444, 889, 550, 2185, 284, 11, 676, 4, 4, 290, 427, 275, 202, 568, 568, 261, 401, 3466, 2514, 690, 133, 529, 423, 548, 439, 1228, 702, 688, 556, 92, 51, 649, 311, 2386, 503, 260, 1073, 3108, 3108, 213, 1460, 458, 1235, 1235, 713, 2782, 647, 348, 348, 388, 390, 351, 212, 465, 143, 143, 371, 511, 145, 402, 513, 514, 401, 338, 2828, 312, 312, 426, 102, 270, 211, 363, 315, 100, 514, 275, 3480, 99, 16, 20, 79, 467, 1405, 533, 602, 42, 591, 313, 269, 269, 269, 269, 377, 140, 140, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 66, 66, 730, 658, 658, 632, 530, 2712, 1935, 4, 648, 1519, 179, 419, 2362, 662, 3836, 570, 112, 202, 261, 401, 577, 684, 351, 515, 515, 565, 515, 227, 591, 742, 101, 498, 445, 378, 90, 2031, 2031, 2031, 2031, 25, 2031, 2031, 630, 214, 2712, 369, 331, 730, 508, 508, 508, 508, 508, 66, 730, 730, 730, 730, 508, 331, 331, 331, 1849, 398, 213, 444, 97, 285, 198, 198, 198, 409, 1206, 446, 28, 227, 227, 143, 371, 371, 224, 21, 3, 446, 1228, 100, 1354, 690, 1930, 387, 2185, 51, 51, 311, 311, 608, 707, 375, 6, 823, 96, 2437, 590, 277, 209, 159, 159, 445, 722, 32, 1354, 690, 3052, 232, 456, 3055, 388, 310, 87, 298, 298, 235, 317, 179, 218, 57, 57, 9, 2297, 102, 600, 356, 337, 238, 603, 355, 355, 22, 432, 307, 270, 412, 356, 10, 3966, 257, 2645, 4085, 4085, 364, 364, 364, 364, 364, 69, 524, 201, 201, 490, 299, 46, 640, 541, 359, 74, 421, 624, 94, 690, 3605, 1519, 157, 157, 199, 25, 597, 4, 3889, 386, 427, 275, 385, 3945, 568, 344, 3466, 551, 2514, 690, 624, 529, 63, 528, 1228, 2992, 688, 595, 311, 311, 346, 346, 223, 3, 3108, 65, 550, 458, 458, 3823, 507, 2782, 647, 348, 388, 388, 3052, 212, 143, 143, 143, 371, 359, 613, 173, 343, 401, 1235, 2828, 2849, 2849, 2849, 2849, 102, 306, 626, 2592, 363, 363, 238, 11, 645, 562, 31, 79, 446, 32, 185, 482, 3052, 686, 269, 686, 56, 498, 498, 378, 378, 273, 2031, 25, 686, 686, 686, 39, 190, 28, 2712, 67, 344, 115, 115, 401, 359, 533, 279, 402, 1037, 55, 626, 823, 618, 594, 206, 107, 46, 10, 337, 570, 601, 601, 507, 20, 684, 197, 188, 403, 193, 322, 337, 570, 569, 601, 99, 38, 298, 406, 160, 332, 332, 9, 102, 102, 412, 10, 356, 337, 570, 355, 15, 341, 9, 9, 673, 9, 402, 34, 369, 331, 331, 730, 66, 66, 66, 66, 66, 66, 730, 730, 508, 508, 508, 508, 658, 221, 321, 492, 700, 565, 565, 515, 226, 3052, 740, 644, 115, 736, 461, 715, 437, 32, 185, 119, 309, 706, 690, 33, 27, 269, 269, 269, 686, 83], "source_text": " : 以下是一道小学数学题: \n有一条直线,上面有 6 只蚂蚁,其中 4 只向左走,2 只向右走。问:向右走的蚂蚁数量占比是多少?\nA. 25%\nB. 33.3%\nC. 50%\nD. 66.6%\n回答该问题。", "source_wav": "/home/v-wenxichen/data/debug/slam-omni_jsonl/audio/audio_9.wav", "target_text": "向右走的蚂蚁数量占比为两只蚂蚁中向右走的蚂蚁数量与总蚂蚁数量的比值。\n先算出总蚂蚁数量:6 只蚂蚁。\n再算出向右走的蚂蚁数量:2 只蚂蚁。\n将向右走的蚂蚁数量除以总蚂蚁数量,即得到向右走的蚂蚁数量占比。\n计算式为:\n2 ÷ 6 = 0.33\n所以,向右走的蚂蚁数量占比为 33.3%,选 B。"} diff --git a/examples/s2s/demo/demo_data/jsonl_demo.jsonl b/examples/s2s/demo/demo_data/jsonl_demo.jsonl deleted file mode 100644 index 5562f43e..00000000 --- a/examples/s2s/demo/demo_data/jsonl_demo.jsonl +++ /dev/null @@ -1,6 +0,0 @@ -{"key": "1", "source_wav": "/xxx/1.wav", "source_text": "Can you recommend some Chinese food for me?", "target_wav": "/xxx/1.wav", "target_text": "Sure! I recommend trying dumplings, Peking duck, and mapo tofu for a mix of flavors and textures in Chinese cuisine. These dishes offer a good balance of savory, spicy, and crispy elements."} -{"key": "2", "source_wav": "/xxx/2.wav", "source_text": "What are some popular tourist attractions in Paris?", "target_wav": "/xxx/2.wav", "target_text": "Paris is known for its iconic landmarks such as the Eiffel Tower, Louvre Museum, and Notre-Dame Cathedral. Other popular attractions include Montmartre, Champs-Elysees, and Seine River cruises."} -{"key": "3", "source_wav": "/xxx/3.wav", "source_text": "How do I make a classic margarita cocktail?", "target_wav": "/xxx/3.wav", "target_text": "To make a classic margarita cocktail, you will need tequila, triple sec, lime juice, and ice. Combine the ingredients in a shaker with ice, shake well, and strain into a salt-rimmed glass. Garnish with a lime wedge and enjoy!"} -{"key": "4", "source_wav": "/xxx/4.wav", "source_text": "What are some popular hiking trails in the Rocky Mountains?", "target_wav": "/xxx/4.wav", "target_text": "Some popular hiking trails in the Rocky Mountains include the Continental Divide Trail, Longs Peak Trail, and Maroon Bells-Snowmass Wilderness. These trails offer stunning views of the mountains, wildlife, and natural beauty of the region."} -{"key": "5", "source_wav": "/xxx/5.wav", "source_text": "How do I make a classic margarita cocktail?", "target_wav": "/xxx/5.wav", "target_text": "To make a classic margarita cocktail, you will need tequila, triple sec, lime juice, and ice. Combine the ingredients in a shaker with ice, shake well, and strain into a salt-rimmed glass. Garnish with a lime wedge and enjoy!"} -{"key": "6", "source_wav": "/xxx/6.wav", "source_text": "What are some popular hiking trails in the Rocky Mountains?", "target_wav": "/xxx/6.wav", "target_text": "Some popular hiking trails in the Rocky Mountains include the Continental Divide Trail, Longs Peak Trail, and Maroon Bells-Snowmass Wilderness. These trails offer stunning views of the mountains, wildlife, and natural beauty of the region \ No newline at end of file diff --git a/examples/s2s/s2s_config.py b/examples/s2s/s2s_config.py index 0845cb79..eeb97ee2 100644 --- a/examples/s2s/s2s_config.py +++ b/examples/s2s/s2s_config.py @@ -189,7 +189,7 @@ class DataConfig: "help": "whether input is normalized, used for models such as wavlm" }) seed: int = 42 - manifest_format: str = field(default="datasets", metadata={ "help": "alternative: jsonl" }) + manifest_format: str = field(default="parquet", metadata={ "help": "alternative: jsonl" }) split_size: float = 0.1 vocab_config: VocabConfig = field(default_factory=VocabConfig) diff --git a/examples/s2s/scripts/finetune/finetune_s2s.sh b/examples/s2s/scripts/finetune/finetune_s2s.sh index 2ac9ae98..ce1b4594 100755 --- a/examples/s2s/scripts/finetune/finetune_s2s.sh +++ b/examples/s2s/scripts/finetune/finetune_s2s.sh @@ -32,9 +32,10 @@ num_latency_tokens=0 # number of delay tokens (in front of the ge do_layershift=false # if false, tokens in each layers use the same codebook, otherwise, use different codebooks # dataset settings +manifest_format=parquet # parquet or jsonl train_data_path=worstchan/VoiceAssistant-400K-SLAM-Omni val_data_path=worstchan/VoiceAssistant-400K-SLAM-Omni -load_from_cache_file=true # set to true if you have already generated the cache file, otherwise set to false +load_from_cache_file=true # set to true if you have already generated the cache file, otherwise set to false # training settings batch_size_training=6 @@ -89,7 +90,7 @@ hydra.run.dir=$output_dir \ ++dataset_config.input_type=mel \ ++dataset_config.mel_size=$mel_size \ ++dataset_config.seed=42 \ -++dataset_config.manifest_format=datasets \ +++dataset_config.manifest_format=$manifest_format \ ++dataset_config.split_size=$split_size \ ++dataset_config.load_from_cache_file=$load_from_cache_file \ ++dataset_config.task_type=$task_type \ diff --git a/examples/s2s/scripts/finetune/finetune_s2s_group.sh b/examples/s2s/scripts/finetune/finetune_s2s_group.sh index 01c1455e..0408fd49 100755 --- a/examples/s2s/scripts/finetune/finetune_s2s_group.sh +++ b/examples/s2s/scripts/finetune/finetune_s2s_group.sh @@ -32,9 +32,10 @@ num_latency_tokens=0 # number of delay tokens (in front of the ge do_layershift=false # if false, tokens in each layers use the same codebook, otherwise, use different codebooks # dataset settings +manifest_format=parquet # parquet or jsonl train_data_path=worstchan/VoiceAssistant-400K-SLAM-Omni val_data_path=worstchan/VoiceAssistant-400K-SLAM-Omni -load_from_cache_file=true # set to true if you have already generated the cache file, otherwise set to false +load_from_cache_file=true # set to true if you have already generated the cache file, otherwise set to false # training settings batch_size_training=6 @@ -96,7 +97,7 @@ hydra.run.dir=$output_dir \ ++dataset_config.input_type=mel \ ++dataset_config.mel_size=$mel_size \ ++dataset_config.seed=42 \ -++dataset_config.manifest_format=datasets \ +++dataset_config.manifest_format=$manifest_format \ ++dataset_config.split_size=$split_size \ ++dataset_config.load_from_cache_file=$load_from_cache_file \ ++dataset_config.task_type=$task_type \ diff --git a/examples/s2s/scripts/finetune/mini-omni/finetune_s2s.sh b/examples/s2s/scripts/finetune/mini-omni/finetune_s2s.sh index 236ce69b..97136205 100755 --- a/examples/s2s/scripts/finetune/mini-omni/finetune_s2s.sh +++ b/examples/s2s/scripts/finetune/mini-omni/finetune_s2s.sh @@ -20,6 +20,7 @@ mel_size=80 # 80 128 ( only whisper-large-v3 supports 128 ) llm_dim=896 # 896 1536 2048 3584 -> 0.5B 1.5B 3B 7B # dataset settings +manifest_format=parquet # parquet or jsonl train_data_path="/valleblob/v-wenxichen/data/s2s/VoiceAssistant-400K" val_data_path="/valleblob/v-wenxichen/data/s2s/VoiceAssistant-400K" load_from_cache_file=false # set to true if you have already generated the cache file, otherwise set to false @@ -75,7 +76,7 @@ hydra.run.dir=$output_dir \ ++dataset_config.input_type=mel \ ++dataset_config.mel_size=$mel_size \ ++dataset_config.seed=42 \ -++dataset_config.manifest_format=datasets \ +++dataset_config.manifest_format=$manifest_format \ ++dataset_config.split_size=$split_size \ ++dataset_config.load_from_cache_file=$load_from_cache_file \ ++dataset_config.task_type=$task_type \ diff --git a/examples/s2s/scripts/inference/inference_s2s_batch.sh b/examples/s2s/scripts/inference/inference_s2s_batch.sh index 76284282..bcb17392 100755 --- a/examples/s2s/scripts/inference/inference_s2s_batch.sh +++ b/examples/s2s/scripts/inference/inference_s2s_batch.sh @@ -39,7 +39,7 @@ ckpt_path=/valleblob/v-wenxichen/exp/s2s/s2s_train_v3-gpu16-btz3-lr5e-4-fp16-epo # val_data_path=/home/v-wenxichen/SLAM-LLM/examples/s2s/demo/data/${split}.jsonl # huggingface dataset -manifest_format=datasets +manifest_format=parquet val_data_path="/valleblob/v-wenxichen/data/s2s/VoiceAssistant-400K-v1/test" load_from_cache_file=false dataset_sample_seed=777 diff --git a/examples/s2s/scripts/inference/mini-omni/inference_s2s_batch.sh b/examples/s2s/scripts/inference/mini-omni/inference_s2s_batch.sh index 8a146700..4675cc77 100755 --- a/examples/s2s/scripts/inference/mini-omni/inference_s2s_batch.sh +++ b/examples/s2s/scripts/inference/mini-omni/inference_s2s_batch.sh @@ -29,7 +29,7 @@ split=test # val_data_path=/home/v-wenxichen/SLAM-LLM/examples/s2s/demo/data/${split}.jsonl # huggingface dataset -manifest_format=datasets +manifest_format=parquet val_data_path="gpt-omni/VoiceAssistant-400K" load_from_cache_file=true dataset_sample_seed=777 diff --git a/examples/s2s/scripts/inference/mini-omni/inference_s2s_stream.sh b/examples/s2s/scripts/inference/mini-omni/inference_s2s_stream.sh index a1d637b3..89152570 100755 --- a/examples/s2s/scripts/inference/mini-omni/inference_s2s_stream.sh +++ b/examples/s2s/scripts/inference/mini-omni/inference_s2s_stream.sh @@ -28,7 +28,7 @@ split=test # val_data_path=/home/v-wenxichen/SLAM-LLM/examples/s2s/demo/data/${split}.jsonl # huggingface dataset -manifest_format=datasets +manifest_format=parquet val_data_path="gpt-omni/VoiceAssistant-400K" load_from_cache_file=true dataset_sample_seed=1234 diff --git a/examples/s2s/scripts/inference/mini-omni/inference_tts.sh b/examples/s2s/scripts/inference/mini-omni/inference_tts.sh index 25971b57..b3412669 100755 --- a/examples/s2s/scripts/inference/mini-omni/inference_tts.sh +++ b/examples/s2s/scripts/inference/mini-omni/inference_tts.sh @@ -25,7 +25,7 @@ split=test # val_data_path=/home/v-wenxichen/SLAM-LLM/examples/s2s/demo/data/${split}.jsonl # huggingface dataset -manifest_format=datasets +manifest_format=parquet val_data_path="gpt-omni/VoiceAssistant-400K" load_from_cache_file=true dataset_sample_seed=1234 diff --git a/examples/s2s/scripts/pretrain/pretrain_asr.sh b/examples/s2s/scripts/pretrain/pretrain_asr.sh index 4b2ba2bc..69e23e97 100755 --- a/examples/s2s/scripts/pretrain/pretrain_asr.sh +++ b/examples/s2s/scripts/pretrain/pretrain_asr.sh @@ -32,6 +32,7 @@ num_latency_tokens=0 # number of delay tokens (in front of the ge do_layershift=false # if false, tokens in each layers use the same codebook, otherwise, use different codebooks # dataset settings +manifest_format=parquet # parquet or jsonl train_data_path=worstchan/VoiceAssistant-400K-SLAM-Omni val_data_path=worstchan/VoiceAssistant-400K-SLAM-Omni load_from_cache_file=true # set to true if you have already generated the cache file, otherwise set to false @@ -96,7 +97,7 @@ hydra.run.dir=$output_dir \ ++dataset_config.input_type=mel \ ++dataset_config.mel_size=$mel_size \ ++dataset_config.seed=42 \ -++dataset_config.manifest_format=datasets \ +++dataset_config.manifest_format=$manifest_format \ ++dataset_config.split_size=$split_size \ ++dataset_config.load_from_cache_file=$load_from_cache_file \ ++dataset_config.task_type=$task_type \ diff --git a/examples/s2s/scripts/pretrain/pretrain_asr_debug.sh b/examples/s2s/scripts/pretrain/pretrain_asr_debug.sh index 757b0e2b..4d698c1e 100755 --- a/examples/s2s/scripts/pretrain/pretrain_asr_debug.sh +++ b/examples/s2s/scripts/pretrain/pretrain_asr_debug.sh @@ -32,6 +32,7 @@ num_latency_tokens=0 # number of latency tokens (in front of the do_layershift=false # if false, tokens in each layers use the same codebook, otherwise, use different codebooks # dataset settings +manifest_format=parquet # parquet or jsonl train_data_path="/valleblob/v-wenxichen/data/s2s/parquet_data_test/en" val_data_path="/valleblob/v-wenxichen/data/s2s/parquet_data_test/en" load_from_cache_file=true # set to true if you have already generated the cache file, otherwise set to false @@ -97,7 +98,7 @@ hydra.run.dir=$output_dir \ ++dataset_config.input_type=mel \ ++dataset_config.mel_size=$mel_size \ ++dataset_config.seed=42 \ -++dataset_config.manifest_format=datasets \ +++dataset_config.manifest_format=$manifest_format \ ++dataset_config.split_size=$split_size \ ++dataset_config.load_from_cache_file=$load_from_cache_file \ ++dataset_config.task_type=$task_type \ diff --git a/examples/s2s/scripts/pretrain/pretrain_tts.sh b/examples/s2s/scripts/pretrain/pretrain_tts.sh index e945af02..e8c4e45f 100755 --- a/examples/s2s/scripts/pretrain/pretrain_tts.sh +++ b/examples/s2s/scripts/pretrain/pretrain_tts.sh @@ -28,6 +28,7 @@ num_latency_tokens=0 # number of delay tokens (in front of the ge do_layershift=false # if false, tokens in each layers use the same codebook, otherwise, use different codebooks # dataset settings +manifest_format=parquet # parquet or jsonl train_data_path=worstchan/VoiceAssistant-400K-SLAM-Omni val_data_path=worstchan/VoiceAssistant-400K-SLAM-Omni load_from_cache_file=true # set to true if you have already generated the cache file, otherwise set to false @@ -90,7 +91,7 @@ hydra.run.dir=$output_dir \ ++dataset_config.val_data_path=$val_data_path \ ++dataset_config.input_type=mel \ ++dataset_config.seed=42 \ -++dataset_config.manifest_format=datasets \ +++dataset_config.manifest_format=$manifest_format \ ++dataset_config.split_size=$split_size \ ++dataset_config.load_from_cache_file=$load_from_cache_file \ ++dataset_config.task_type=$task_type \ diff --git a/examples/s2s/scripts/pretrain/pretrain_tts_debug.sh b/examples/s2s/scripts/pretrain/pretrain_tts_debug.sh index ccd431e2..99c6868b 100755 --- a/examples/s2s/scripts/pretrain/pretrain_tts_debug.sh +++ b/examples/s2s/scripts/pretrain/pretrain_tts_debug.sh @@ -26,6 +26,7 @@ num_latency_tokens=0 # number of latency tokens (in front of the do_layershift=false # if false, tokens in each layers use the same codebook, otherwise, use different codebooks # dataset settings +manifest_format=parquet # parquet or jsonl train_data_path="/valleblob/v-wenxichen/data/debug/1" val_data_path="/valleblob/v-wenxichen/data/debug/1" load_from_cache_file=true # set to true if you have already generated the cache file, otherwise set to false @@ -82,7 +83,7 @@ hydra.run.dir=$output_dir \ ++dataset_config.val_data_path=$val_data_path \ ++dataset_config.input_type=mel \ ++dataset_config.seed=42 \ -++dataset_config.manifest_format=datasets \ +++dataset_config.manifest_format=$manifest_format \ ++dataset_config.split_size=$split_size \ ++dataset_config.load_from_cache_file=$load_from_cache_file \ ++dataset_config.task_type=$task_type \ diff --git a/examples/s2s/speech_dataset_s2s.py b/examples/s2s/speech_dataset_s2s.py index 62418741..ed7a7664 100644 --- a/examples/s2s/speech_dataset_s2s.py +++ b/examples/s2s/speech_dataset_s2s.py @@ -31,11 +31,11 @@ def __init__(self, self.inference_mode = dataset_config.get("inference_mode", False) self.normalize = dataset_config.get("normalize", False) self.input_type = dataset_config.get("input_type", None) - self.manifest_format = dataset_config.get("manifest_format", "datasets") + self.manifest_format = dataset_config.get("manifest_format", "parquet") self.seed = dataset_config.get("seed", 42) self.split_size = dataset_config.get("split_size", 0.1) assert self.input_type in ["raw", "mel"], "input_type must be one of [raw, mel]" - assert self.manifest_format in ["datasets", "jsonl"], "manifest_format must be one of [datasets, jsonl]" + assert self.manifest_format in ["parquet", "jsonl"], "manifest_format must be one of [parquet, jsonl]" # vocab config self.vocab_config = dataset_config.get("vocab_config", None) @@ -88,7 +88,7 @@ def __init__(self, self.data_list = [] # TODO: design a better way to load data - if self.manifest_format == "datasets": + if self.manifest_format == "parquet": from datasets import load_dataset, load_from_disk if dataset_config.load_from_cache_file: ds = load_dataset(dataset_config.train_data_path) # load_from huggingface datasets @@ -99,7 +99,7 @@ def __init__(self, self.data_list = train_val_split['train'] else: self.data_list = train_val_split['test'] - else: + elif self.manifest_format == "jsonl": if split == "train": with open(dataset_config.train_data_path, encoding='utf-8') as fin: for line in fin: @@ -110,6 +110,8 @@ def __init__(self, for line in fin: data_dict = json.loads(line.strip()) self.data_list.append(data_dict) + else: + raise ValueError("manifest_format must be one of [parquet, jsonl]") def get_source_len(self, data_dict): return data_dict["source_len"] @@ -120,16 +122,15 @@ def get_target_len(self, data_dict): def __len__(self): return len(self.data_list) - # NOTE: here datasets format is just for VoiceAssistant-400K dataset, and we only support the whisper format def extract_audio_feature(self, audio_path): # audio path is a dictionary, resample the audio to 16kHz - if self.manifest_format == "datasets" and isinstance(audio_path, dict): + if self.manifest_format == "parquet" and isinstance(audio_path, dict): audio_raw = audio_path['array'] audio_raw_sr = audio_path['sampling_rate'] if not isinstance(audio_raw, np.ndarray): audio_raw = np.array(audio_raw) audio_raw = librosa.resample(audio_raw, orig_sr=audio_raw_sr, target_sr=16000).astype(np.float32) - elif self.manifest_format == "datasets" and (isinstance(audio_path, str) or isinstance(audio_path, list)): + elif (self.manifest_format == "parquet" and (isinstance(audio_path, str) or isinstance(audio_path, list))) or (self.manifest_format == "jsonl" and isinstance(audio_path, list)): if self.code_type == "SNAC": audio_res, audio_length = get_snac_answer_token(audio_path) elif self.code_type == "CosyVoice": @@ -233,7 +234,7 @@ def __getitem__(self, index): audio_length = 0 target_audio_length = 0 - if self.manifest_format == "datasets": + if self.manifest_format == "parquet": source_audio = data_dict.get("question_audio", None) if self.code_type == "SNAC": target_audio = data_dict.get("answer_snac", None) @@ -245,12 +246,12 @@ def __getitem__(self, index): key = source_audio['path'] elif self.manifest_format == "jsonl": source_audio = data_dict.get("source_wav", None) - target_audio = data_dict.get("target_wav", None) + target_audio = data_dict.get("target_token", None) source_text = data_dict.get("source_text", None) target_text = data_dict.get("target_text", None) key = data_dict.get("key", None) else: - raise ValueError("manifest_format must be one of [datasets, jsonl]") + raise ValueError("manifest_format must be one of [parquet, jsonl]") if task_type == "s2s" or task_type == "asr": audio_mel, audio_length = self.extract_audio_feature(source_audio) From 5cb725b3692f58bf8dc64e626e0a13be21bf4857 Mon Sep 17 00:00:00 2001 From: Wenxi Chen <113527217+cwx-worst-one@users.noreply.github.com> Date: Sun, 9 Feb 2025 21:00:21 +0800 Subject: [PATCH 2/2] Update README.md --- examples/s2s/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/s2s/README.md b/examples/s2s/README.md index 7e655f21..f7143285 100644 --- a/examples/s2s/README.md +++ b/examples/s2s/README.md @@ -41,7 +41,7 @@ ds = load_dataset("DATASET_NAME") ### JSONL We also support JSONL format for its concise structure. Below is an example: ```jsonl -{"key": "1", "source_wav": "/xxx/1.wav", "source_text": "Can you recommend some Chinese food for me?", "target_token": [742, 383, 455, ..., 180, 619], "target_text": "Sure! I recommend trying dumplings, Peking duck, and mapo tofu for a mix of flavors and textures in Chinese cuisine. These dishes offer a good balance of savory, spicy, and crispy elements."} +{"key": "1", "source_wav": "/xxx/1.wav", "source_text": "Can you recommend some Chinese food for me?", "target_token": [742, 383, 455, 619, 180], "target_text": "Sure! I recommend trying dumplings, Peking duck, and mapo tofu for a mix of flavors and textures in Chinese cuisine. These dishes offer a good balance of savory, spicy, and crispy elements."} ``` 🔔**Update**: @@ -147,4 +147,4 @@ Mini-Omni: ## License -Our code is released under MIT License. The Chinese dialogue model is licensed under GPL-3.0 due to its use of Belle data and is intended for research purposes only. \ No newline at end of file +Our code is released under MIT License. The Chinese dialogue model is licensed under GPL-3.0 due to its use of Belle data and is intended for research purposes only.