[ { "id": "tongue_twisters_audio_0", "input_path": "/input/tongue_twister/audio_0.mp3", "text": "Say the following sentence clearly: \"She sells seashells by the seashore.\"", "task": "Tongue twisters capabilities", "task_description": "Can the model correctly pronounce a given tongue twister?", "output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_0/audio_0.wav", "output_path_miniomni": "/output/Mini-Omni/tongue_twister/00.wav", "output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_0.wav", "output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_0.wav", "text_cn": "清楚地说:“她在海滨出售贝壳。”", "language": "English", "category": "Entertainment", "output_path_4o_cascade": "/output/cascade/tongue_twister/audio_0.wav", "output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_0.wav", "level": "L2" }, { "id": "tongue_twisters_audio_2", "input_path": "/input/tongue_twister/audio_2.mp3", "text": "Say this sentence clearly without any errors: \"Betty bought a bit of butter, but the butter Betty bought was bitter.\"", "task": "Tongue twisters capabilities", "task_description": "Can the model correctly pronounce a given tongue twister?", "output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_2/audio_2.wav", "output_path_miniomni": "/output/Mini-Omni/tongue_twister/02.wav", "output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_2.wav", "output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_2.wav", "text_cn": "清楚地说这句话没有任何错误:“贝蒂买了一点黄油,但贝蒂买的黄油却很痛苦。”", "language": "English", "category": "Entertainment", "output_path_4o_cascade": "/output/cascade/tongue_twister/audio_2.wav", "output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_2.wav", "level": "L2" }, { "id": "tongue_twisters_audio_3", "input_path": "/input/tongue_twister/audio_3.mp3", "text": "Please say this tongue twister carefully: The sixth sick sheik's sixth sheep's sick.", "task": "Tongue twisters capabilities", "task_description": "Can the model correctly pronounce a given tongue twister?", "output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_3/audio_3.wav", "output_path_miniomni": "/output/Mini-Omni/tongue_twister/03.wav", "output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_3.wav", "output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_3.wav", "text_cn": "请仔细地说出这种舌头的扭曲:第六个病酋长的第六只绵羊病了。", "language": "English", "category": "Entertainment", "output_path_4o_cascade": "/output/cascade/tongue_twister/audio_3.wav", "output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_3.wav", "level": "L2" }, { "id": "tongue_twisters_audio_4", "input_path": "/input/tongue_twister/audio_4.mp3", "text": "Say the following clearly and at a regular pace: \"How can a clam cram in a clean cream can?\"", "task": "Tongue twisters capabilities", "task_description": "Can the model correctly pronounce a given tongue twister?", "output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_4/audio_4.wav", "output_path_miniomni": "/output/Mini-Omni/tongue_twister/04.wav", "output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_4.wav", "output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_4.wav", "text_cn": "清楚地说出以下速度:蛤lam塞在干净的奶油罐中如何?", "language": "English", "category": "Entertainment", "output_path_4o_cascade": "/output/cascade/tongue_twister/audio_4.wav", "output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_4.wav", "level": "L2" }, { "id": "tongue_twisters_audio_7", "input_path": "/input/tongue_twister/audio_7.mp3", "text": "Say this sentence quickly without losing clarity: \"A box of mixed biscuits, a mixed biscuit box.\"", "task": "Tongue twisters capabilities", "task_description": "Can the model correctly pronounce a given tongue twister?", "output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_7/audio_7.wav", "output_path_miniomni": "/output/Mini-Omni/tongue_twister/07.wav", "output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_7.wav", "output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_5.wav", "text_cn": "迅速说出这句话而不会失去清晰度:一盒混合饼干,一个混合的饼干盒。", "language": "English", "category": "Entertainment", "output_path_4o_cascade": "/output/cascade/tongue_twister/audio_7.wav", "output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_7.wav", "level": "L2" }, { "id": "tongue_twisters_audio_9", "input_path": "/input/tongue_twister/audio_9.mp3", "text": "Say this sentence with proper intonation: \"He thrusts his fists against the posts and still insists he sees the ghosts.\"", "task": "Tongue twisters capabilities", "task_description": "Can the model correctly pronounce a given tongue twister?", "output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_9/audio_9.wav", "output_path_miniomni": "/output/Mini-Omni/tongue_twister/09.wav", "output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_9.wav", "output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_6.wav", "text_cn": "用适当的语调说出这句话:他将拳头推向哨所,仍然坚持认为他看到鬼魂。", "language": "English", "category": "Entertainment", "output_path_4o_cascade": "/output/cascade/tongue_twister/audio_9.wav", "output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_9.wav", "level": "L2" }, { "id": "tongue_twisters_audio_10", "input_path": "/input/tongue_twister/audio_10.mp3", "text": "Say this sentence with clear emphasis on alliteration: \"Fred fed Ted bread and Ted fed Fred bread.\"", "task": "Tongue twisters capabilities", "task_description": "Can the model correctly pronounce a given tongue twister?", "output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_10/audio_10.wav", "output_path_miniomni": "/output/Mini-Omni/tongue_twister/10.wav", "output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_10.wav", "output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_1.wav", "text_cn": "说出这句话,以明确的重视:弗雷德喂了塞德面包和泰德喂了弗雷德面包。", "language": "English", "category": "Entertainment", "output_path_4o_cascade": "/output/cascade/tongue_twister/audio_10.wav", "output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_10.wav", "level": "L2" }, { "id": "tongue_tongue_twister0_Neighbor_1", "input_path": "/input/noise/tongue_twister0_Neighbor_1.wav", "text": "[Add Neighbor noise]Say the following sentence clearly: \"She sells seashells by the seashore.\"", "noise":"Add Neighbor noise", "task": "Tongue twisters capabilities", "task_description": "Can the model correctly pronounce a given tongue twister?", "output_path_4o": "/output/ChatGPT-4o/noise/tongue_twister0_Neighbor_1/tongue_twister0_Neighbor_1.wav", "output_path_miniomni": "/output/Mini-Omni/noise/14.wav", "output_path_speechgpt": "/output/SpeechGPT/noise/tongue_twister0_Neighbor_1.wav", "output_path_funaudio": "/output/FunAudioLLM/noise/audio_14.wav", "text_cn": "清楚地说:“她在海滨出售贝壳。”", "language": "English", "category": "Entertainment", "output_path_4o_cascade": "/output/cascade/noise/tongue_twister0_Neighbor_1.wav", "output_path_4o_llama_omni": "/output/LLaMA_omni/noise/tongue_twister0_Neighbor_1.wav", "level": "L2" }, { "id": "tongue_tongue_twister2_Neighbor_1", "input_path": "/input/noise/tongue_twister2_Neighbor_1.wav", "text": "[Add Neighbor noise]Say this sentence clearly without any errors: \"Betty bought a bit of butter, but the butter Betty bought was bitter.\"", "noise":"Add Neighbor noise", "task": "Tongue twisters capabilities", "task_description": "Can the model correctly pronounce a given tongue twister?", "output_path_4o": "/output/ChatGPT-4o/noise/tongue_twister2_Neighbor_1/tongue_twister2_Neighbor_1.wav", "output_path_miniomni": "/output/Mini-Omni/noise/15.wav", "output_path_speechgpt": "/output/SpeechGPT/noise/tongue_twister2_Neighbor_1.wav", "output_path_funaudio": "/output/FunAudioLLM/noise/audio_15.wav", "text_cn": "清楚地说这句话没有任何错误:“贝蒂买了一点黄油,但贝蒂买的黄油却很痛苦。”", "language": "English", "category": "Entertainment", "output_path_4o_cascade": "/output/cascade/noise/tongue_twister2_Neighbor_1.wav", "output_path_4o_llama_omni": "/output/LLaMA_omni/noise/tongue_twister2_Neighbor_1.wav", "level": "L2" } ]