[ { "id": "role_audio_0", "input_path": "/input/role/audio_0.mp3", "text": "Imitate my voice and accent and say something", "task": "Role-playing", "task_description": "Can the model simulate a character with specific age, gender, accent, and voice tone?", "output_path_4o": "/output/ChatGPT-4o/role/audio_0/audio_0.wav", "output_path_miniomni": "/output/Mini-Omni/role/00.wav", "output_path_speechgpt": "/output/SpeechGPT/role/answer_0.wav", "output_path_funaudio": "/output/FunAudioLLM/role/audio_0.wav", "text_cn": "模仿我的声音和口音,说些什么", "language": "English", "category": "Entertainment", "output_path_4o_cascade": "/output/cascade/role/audio_0.wav", "output_path_4o_llama_omni": "/output/LLaMA_omni/role/audio_0.wav", "level": "L3" }, { "id": "role_audio_1", "input_path": "/input/role/audio_1.mp3", "text": "Imitate my age say something", "task": "Role-playing", "task_description": "Can the model simulate a character with specific age, gender, accent, and voice tone?", "output_path_4o": "/output/ChatGPT-4o/role/audio_1/audio_1.wav", "output_path_miniomni": "/output/Mini-Omni/role/01.wav", "output_path_speechgpt": "/output/SpeechGPT/role/answer_1.wav", "output_path_funaudio": "/output/FunAudioLLM/role/audio_1.wav", "text_cn": "模仿我的年龄说些什么", "language": "English", "category": "Entertainment", "output_path_4o_cascade": "/output/cascade/role/audio_1.wav", "output_path_4o_llama_omni": "/output/LLaMA_omni/role/audio_1.wav", "level": "L3" }, { "id": "role_role0_VacuumCleaner_1", "input_path": "/input/noise/role0_VacuumCleaner_1.wav", "text": "[Add VacuumCleaner noise]Imitate my voice and accent and say something", "noise":"Add VacuumCleaner noise", "task": "Role-playing", "task_description": "Can the model simulate a character with specific age, gender, accent, and voice tone?", "output_path_4o": "/output/ChatGPT-4o/noise/role0_VacuumCleaner_1/role0_VacuumCleaner_1.wav", "output_path_miniomni": "/output/Mini-Omni/noise/08.wav", "output_path_speechgpt": "/output/SpeechGPT/noise/role0_VacuumCleaner_1.wav", "output_path_funaudio": "/output/FunAudioLLM/noise/audio_8.wav", "text_cn": "模仿我的声音和口音,说些什么", "language": "English", "category": "Entertainment", "output_path_4o_cascade": "/output/cascade/noise/role0_VacuumCleaner_1.wav", "output_path_4o_llama_omni": "/output/LLaMA_omni/noise/role0_VacuumCleaner_1.wav", "level": "L3" }, { "id": "role_role1_VacuumCleaner_1", "input_path": "/input/noise/role1_VacuumCleaner_1.wav", "text": "[Add VacuumCleaner noise]Imitate my age say something", "noise":"Add VacuumCleaner noise", "task": "Role-playing", "task_description": "Can the model simulate a character with specific age, gender, accent, and voice tone?", "output_path_4o": "/output/ChatGPT-4o/noise/role1_VacuumCleaner_1/role1_VacuumCleaner_1.wav", "output_path_miniomni": "/output/Mini-Omni/noise/09.wav", "output_path_speechgpt": "/output/SpeechGPT/noise/role1_VacuumCleaner_1.wav", "output_path_funaudio": "/output/FunAudioLLM/noise/audio_9.wav", "text_cn": "模仿我的年龄说些什么", "language": "English", "category": "Entertainment", "output_path_4o_cascade": "/output/cascade/noise/role1_VacuumCleaner_1.wav", "output_path_4o_llama_omni": "/output/LLaMA_omni/noise/role1_VacuumCleaner_1.wav", "level": "L3" } ]