|
[ |
|
{ |
|
"id": "pause_pause1", |
|
"input_path": "/input/pause/pause1.wav", |
|
"text": "重复一遍“我一把/把把/把住了”这句话", |
|
"task": "Pause and segmentation", |
|
"task_description": "Can the model accurately pause and segment in ambiguous cases?", |
|
"output_path_4o": "/output/ChatGPT-4o/pause/pause1/pause1.wav", |
|
"output_path_miniomni": "/output/Mini-Omni/pause/00.wav", |
|
"output_path_speechgpt": "/output/SpeechGPT/pause/pause1.wav", |
|
"output_path_funaudio": "/output/FunAudioLLM/pause/audio_0.wav", |
|
"language": "Chinese", |
|
"category": "Education", |
|
"output_path_4o_cascade": "/output/cascade/pause/pause1.wav", |
|
"output_path_4o_llama_omni": "/output/LLaMA_omni/pause/pause1.wav", |
|
"level": "L3" |
|
}, |
|
{ |
|
"id": "pause_pause2", |
|
"input_path": "/input/pause/pause2.wav", |
|
"text": "“我一把/把把/把住了”你知道是什么意思吗?", |
|
"task": "Pause and segmentation", |
|
"task_description": "Can the model accurately pause and segment in ambiguous cases?", |
|
"output_path_4o": "/output/ChatGPT-4o/pause/pause2/pause2.wav", |
|
"output_path_miniomni": "/output/Mini-Omni/pause/01.wav", |
|
"output_path_speechgpt": "/output/SpeechGPT/pause/pause2.wav", |
|
"output_path_funaudio": "/output/FunAudioLLM/pause/audio_1.wav", |
|
"language": "Chinese", |
|
"category": "Education", |
|
"output_path_4o_cascade": "/output/cascade/pause/pause2.wav", |
|
"output_path_4o_llama_omni": "/output/LLaMA_omni/pause/pause2.wav", |
|
"level": "L1" |
|
}, |
|
{ |
|
"id": "pause_pause3", |
|
"input_path": "/input/pause/pause3.wav", |
|
"text": "你知道下面这句话是什么意思吗?“昨天晚上小偷/偷偷/偷了我的电脑。”,并用明显的停顿重复一遍。", |
|
"task": "Pause and segmentation", |
|
"task_description": "Can the model accurately pause and segment in ambiguous cases?", |
|
"output_path_4o": "/output/ChatGPT-4o/pause/pause3/pause3.wav", |
|
"output_path_miniomni": "/output/Mini-Omni/pause/02.wav", |
|
"output_path_speechgpt": "/output/SpeechGPT/pause/pause3.wav", |
|
"output_path_funaudio": "/output/FunAudioLLM/pause/audio_2.wav", |
|
"language": "Chinese", |
|
"category": "Education", |
|
"output_path_4o_cascade": "/output/cascade/pause/pause3.wav", |
|
"output_path_4o_llama_omni": "/output/LLaMA_omni/pause/pause3.wav", |
|
"level": "L3" |
|
}, |
|
{ |
|
"id": "pause_pause4", |
|
"input_path": "/input/pause/pause4.wav", |
|
"text": "下面第一个句子还是第二个句子的停顿是正确的?“南京市/长江大桥欢迎您’和‘南京市长/江大桥欢迎您’”", |
|
"task": "Pause and segmentation", |
|
"task_description": "Can the model accurately pause and segment in ambiguous cases?", |
|
"output_path_4o": "/output/ChatGPT-4o/pause/pause4/pause4.wav", |
|
"output_path_miniomni": "/output/Mini-Omni/pause/03.wav", |
|
"output_path_speechgpt": "/output/SpeechGPT/pause/pause4.wav", |
|
"output_path_funaudio": "/output/FunAudioLLM/pause/audio_3.wav", |
|
"language": "Chinese", |
|
"category": "Education", |
|
"output_path_4o_cascade": "/output/cascade/pause/pause4.wav", |
|
"output_path_4o_llama_omni": "/output/LLaMA_omni/pause/pause4.wav", |
|
"level": "L1" |
|
} |
|
] |