S2S_Evaluation / data /tongue.json
KurtDu's picture
Upload 21 files
50ad069 verified
raw
history blame
9.75 kB
[
{
"id": "tongue_twisters_audio_0",
"input_path": "/input/tongue_twister/audio_0.mp3",
"text": "Say the following sentence clearly: \"She sells seashells by the seashore.\"",
"task": "Tongue twisters capabilities",
"task_description": "Can the model correctly pronounce a given tongue twister?",
"output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_0/audio_0.wav",
"output_path_miniomni": "/output/Mini-Omni/tongue_twister/00.wav",
"output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_0.wav",
"output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_0.wav",
"text_cn": "清楚地说:“她在海滨出售贝壳。”",
"language": "English",
"category": "Entertainment",
"output_path_4o_cascade": "/output/cascade/tongue_twister/audio_0.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_0.wav",
"level": "L2"
},
{
"id": "tongue_twisters_audio_2",
"input_path": "/input/tongue_twister/audio_2.mp3",
"text": "Say this sentence clearly without any errors: \"Betty bought a bit of butter, but the butter Betty bought was bitter.\"",
"task": "Tongue twisters capabilities",
"task_description": "Can the model correctly pronounce a given tongue twister?",
"output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_2/audio_2.wav",
"output_path_miniomni": "/output/Mini-Omni/tongue_twister/02.wav",
"output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_2.wav",
"output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_2.wav",
"text_cn": "清楚地说这句话没有任何错误:“贝蒂买了一点黄油,但贝蒂买的黄油却很痛苦。”",
"language": "English",
"category": "Entertainment",
"output_path_4o_cascade": "/output/cascade/tongue_twister/audio_2.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_2.wav",
"level": "L2"
},
{
"id": "tongue_twisters_audio_3",
"input_path": "/input/tongue_twister/audio_3.mp3",
"text": "Please say this tongue twister carefully: The sixth sick sheik's sixth sheep's sick.",
"task": "Tongue twisters capabilities",
"task_description": "Can the model correctly pronounce a given tongue twister?",
"output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_3/audio_3.wav",
"output_path_miniomni": "/output/Mini-Omni/tongue_twister/03.wav",
"output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_3.wav",
"output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_3.wav",
"text_cn": "请仔细地说出这种舌头的扭曲:第六个病酋长的第六只绵羊病了。",
"language": "English",
"category": "Entertainment",
"output_path_4o_cascade": "/output/cascade/tongue_twister/audio_3.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_3.wav",
"level": "L2"
},
{
"id": "tongue_twisters_audio_4",
"input_path": "/input/tongue_twister/audio_4.mp3",
"text": "Say the following clearly and at a regular pace: \"How can a clam cram in a clean cream can?\"",
"task": "Tongue twisters capabilities",
"task_description": "Can the model correctly pronounce a given tongue twister?",
"output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_4/audio_4.wav",
"output_path_miniomni": "/output/Mini-Omni/tongue_twister/04.wav",
"output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_4.wav",
"output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_4.wav",
"text_cn": "清楚地说出以下速度:蛤lam塞在干净的奶油罐中如何?",
"language": "English",
"category": "Entertainment",
"output_path_4o_cascade": "/output/cascade/tongue_twister/audio_4.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_4.wav",
"level": "L2"
},
{
"id": "tongue_twisters_audio_7",
"input_path": "/input/tongue_twister/audio_7.mp3",
"text": "Say this sentence quickly without losing clarity: \"A box of mixed biscuits, a mixed biscuit box.\"",
"task": "Tongue twisters capabilities",
"task_description": "Can the model correctly pronounce a given tongue twister?",
"output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_7/audio_7.wav",
"output_path_miniomni": "/output/Mini-Omni/tongue_twister/07.wav",
"output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_7.wav",
"output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_5.wav",
"text_cn": "迅速说出这句话而不会失去清晰度:一盒混合饼干,一个混合的饼干盒。",
"language": "English",
"category": "Entertainment",
"output_path_4o_cascade": "/output/cascade/tongue_twister/audio_7.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_7.wav",
"level": "L2"
},
{
"id": "tongue_twisters_audio_9",
"input_path": "/input/tongue_twister/audio_9.mp3",
"text": "Say this sentence with proper intonation: \"He thrusts his fists against the posts and still insists he sees the ghosts.\"",
"task": "Tongue twisters capabilities",
"task_description": "Can the model correctly pronounce a given tongue twister?",
"output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_9/audio_9.wav",
"output_path_miniomni": "/output/Mini-Omni/tongue_twister/09.wav",
"output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_9.wav",
"output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_6.wav",
"text_cn": "用适当的语调说出这句话:他将拳头推向哨所,仍然坚持认为他看到鬼魂。",
"language": "English",
"category": "Entertainment",
"output_path_4o_cascade": "/output/cascade/tongue_twister/audio_9.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_9.wav",
"level": "L2"
},
{
"id": "tongue_twisters_audio_10",
"input_path": "/input/tongue_twister/audio_10.mp3",
"text": "Say this sentence with clear emphasis on alliteration: \"Fred fed Ted bread and Ted fed Fred bread.\"",
"task": "Tongue twisters capabilities",
"task_description": "Can the model correctly pronounce a given tongue twister?",
"output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_10/audio_10.wav",
"output_path_miniomni": "/output/Mini-Omni/tongue_twister/10.wav",
"output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_10.wav",
"output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_1.wav",
"text_cn": "说出这句话,以明确的重视:弗雷德喂了塞德面包和泰德喂了弗雷德面包。",
"language": "English",
"category": "Entertainment",
"output_path_4o_cascade": "/output/cascade/tongue_twister/audio_10.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_10.wav",
"level": "L2"
},
{
"id": "tongue_tongue_twister0_Neighbor_1",
"input_path": "/input/noise/tongue_twister0_Neighbor_1.wav",
"text": "[Add Neighbor noise]Say the following sentence clearly: \"She sells seashells by the seashore.\"",
"noise":"Add Neighbor noise",
"task": "Tongue twisters capabilities",
"task_description": "Can the model correctly pronounce a given tongue twister?",
"output_path_4o": "/output/ChatGPT-4o/noise/tongue_twister0_Neighbor_1/tongue_twister0_Neighbor_1.wav",
"output_path_miniomni": "/output/Mini-Omni/noise/14.wav",
"output_path_speechgpt": "/output/SpeechGPT/noise/tongue_twister0_Neighbor_1.wav",
"output_path_funaudio": "/output/FunAudioLLM/noise/audio_14.wav",
"text_cn": "清楚地说:“她在海滨出售贝壳。”",
"language": "English",
"category": "Entertainment",
"output_path_4o_cascade": "/output/cascade/noise/tongue_twister0_Neighbor_1.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/noise/tongue_twister0_Neighbor_1.wav",
"level": "L2"
},
{
"id": "tongue_tongue_twister2_Neighbor_1",
"input_path": "/input/noise/tongue_twister2_Neighbor_1.wav",
"text": "[Add Neighbor noise]Say this sentence clearly without any errors: \"Betty bought a bit of butter, but the butter Betty bought was bitter.\"",
"noise":"Add Neighbor noise",
"task": "Tongue twisters capabilities",
"task_description": "Can the model correctly pronounce a given tongue twister?",
"output_path_4o": "/output/ChatGPT-4o/noise/tongue_twister2_Neighbor_1/tongue_twister2_Neighbor_1.wav",
"output_path_miniomni": "/output/Mini-Omni/noise/15.wav",
"output_path_speechgpt": "/output/SpeechGPT/noise/tongue_twister2_Neighbor_1.wav",
"output_path_funaudio": "/output/FunAudioLLM/noise/audio_15.wav",
"text_cn": "清楚地说这句话没有任何错误:“贝蒂买了一点黄油,但贝蒂买的黄油却很痛苦。”",
"language": "English",
"category": "Entertainment",
"output_path_4o_cascade": "/output/cascade/noise/tongue_twister2_Neighbor_1.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/noise/tongue_twister2_Neighbor_1.wav",
"level": "L2"
}
]