Spaces:
Sleeping
Sleeping
[ | |
{ | |
"id": "emotion_audio_0", | |
"input_path": "/input/emotion/audio_0.wav", | |
"text": "[emotion: happy]Kids are talking by the door", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_0/audio_0.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/00.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_0.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_0.wav", | |
"text_cn": "孩子们在门旁说话", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/audio_0.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_0.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_audio_1", | |
"input_path": "/input/emotion/audio_1.wav", | |
"text": "[emotion: sad]Kids are talking by the door", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_1/audio_1.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/01.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_1.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_1.wav", | |
"text_cn": "孩子们在门旁说话", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/audio_1.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_1.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_audio_2", | |
"input_path": "/input/emotion/audio_2.wav", | |
"text": "[emotion: angry]Kids are talking by the door", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_2/audio_2.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/02.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_2.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_2.wav", | |
"text_cn": "孩子们在门旁说话", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/audio_2.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_2.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_audio_3", | |
"input_path": "/input/emotion/audio_3.wav", | |
"text": "[emotion: fealful]Kids are talking by the door", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_3/audio_3.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/03.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_3.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_3.wav", | |
"text_cn": "孩子们在门旁说话", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/audio_3.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_3.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_audio_4", | |
"input_path": "/input/emotion/audio_4.wav", | |
"text": "[emotion: disgust]Kids are talking by the door", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_4/audio_4.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/04.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_4.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_4.wav", | |
"text_cn": "孩子们在门旁说话", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/audio_4.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_4.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_audio_5", | |
"input_path": "/input/emotion/audio_5.wav", | |
"text": "[emotion: surprised]Kids are talking by the door", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_5/audio_5.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/05.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_5.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_5.wav", | |
"text_cn": "孩子们在门旁说话", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/audio_5.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_5.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_audio_6", | |
"input_path": "/input/emotion/audio_6.wav", | |
"text": "[emotion: happy]Dogs are sitting by the door", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_6/audio_6.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/06.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_6.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_6.wav", | |
"text_cn": "狗坐在门旁", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/audio_6.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_6.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_audio_7", | |
"input_path": "/input/emotion/audio_7.wav", | |
"text": "[emotion: sad]Dogs are sitting by the door", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_7/audio_7.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/07.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_7.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_7.wav", | |
"text_cn": "狗坐在门旁", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/audio_7.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_7.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_audio_8", | |
"input_path": "/input/emotion/audio_8.wav", | |
"text": "[emotion: angry]Dogs are sitting by the door", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_8/audio_8.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/08.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_8.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_8.wav", | |
"text_cn": "狗坐在门旁", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/audio_8.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_8.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_audio_9", | |
"input_path": "/input/emotion/audio_9.wav", | |
"text": "[emotion: fealful]Dogs are sitting by the door", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_9/audio_9.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/09.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_9.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_9.wav", | |
"text_cn": "狗坐在门旁", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/audio_9.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_9.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_audio_10", | |
"input_path": "/input/emotion/audio_10.wav", | |
"text": "[emotion: disgust]Dogs are sitting by the door", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_10/audio_10.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/10.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_10.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_10.wav", | |
"text_cn": "狗坐在门旁", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/audio_10.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_10.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_audio_11", | |
"input_path": "/input/emotion/audio_11.wav", | |
"text": "[emotion: surprised]Dogs are sitting by the door", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_11/audio_11.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/11.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_11.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_11.wav", | |
"text_cn": "狗坐在门旁", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/audio_11.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_11.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_emotion2-1", | |
"input_path": "/input/emotion/emotion2-1.wav", | |
"text": "[emotion: happy]What should I do now?", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion2-1/emotion2-1.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/13.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion2-1.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_13.wav", | |
"text_cn": "我现在该怎么办?", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/emotion2-1.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion2-1.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_emotion2-2", | |
"input_path": "/input/emotion/emotion2-2.wav", | |
"text": "[emotion: sad]What should I do now?", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion2-2/emotion2-2.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/14.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion2-2.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_14.wav", | |
"text_cn": "我现在该怎么办?", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/emotion2-2.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion2-2.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_emotion2-3", | |
"input_path": "/input/emotion/emotion2-3.wav", | |
"text": "[emotion: angry]What should I do now?", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion2-3/emotion2-3.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/15.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion2-3.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_15.wav", | |
"text_cn": "我现在该怎么办?", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/emotion2-3.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion2-3.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_emotion2-4", | |
"input_path": "/input/emotion/emotion2-4.wav", | |
"text": "[emotion: fealful]What should I do now?", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion2-4/emotion2-4.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/16.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion2-4.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_16.wav", | |
"text_cn": "我现在该怎么办?", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/emotion2-4.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion2-4.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_emotion3-1", | |
"input_path": "/input/emotion/emotion3-1.wav", | |
"text": "[emotion: happy]I really wish things could be different.", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion3-1/emotion3-1.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/17.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion3-1.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_17.wav", | |
"text_cn": "我真希望事情能够有所不同。", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/emotion3-1.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion3-1.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_emotion3-2", | |
"input_path": "/input/emotion/emotion3-2.wav", | |
"text": "[emotion: sad]I really wish things could be different.", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion3-2/emotion3-2.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/18.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion3-2.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_18.wav", | |
"text_cn": "我真希望事情能够有所不同。", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/emotion3-2.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion3-2.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_emotion3-3", | |
"input_path": "/input/emotion/emotion3-3.wav", | |
"text": "[emotion: angry]I really wish things could be different.", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion3-3/emotion3-3.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/19.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion3-3.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_19.wav", | |
"text_cn": "我真希望事情能够有所不同。", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/emotion3-3.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion3-3.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_emotion3-4", | |
"input_path": "/input/emotion/emotion3-4.wav", | |
"text": "[emotion: fealful]I really wish things could be different.", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion3-4/emotion3-4.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/20.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion3-4.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_20.wav", | |
"text_cn": "我真希望事情能够有所不同。", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/emotion3-4.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion3-4.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_emotion4-1", | |
"input_path": "/input/emotion/emotion4-1.wav", | |
"text": "[emotion: happy]This reminds me of a lot of things.", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion4-1/emotion4-1.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/21.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion4-1.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_21.wav", | |
"text_cn": "这让我想起了很多事。", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/emotion4-1.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion4-1.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_emotion4-2", | |
"input_path": "/input/emotion/emotion4-2.wav", | |
"text": "[emotion: sad]This reminds me of a lot of things.", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion4-2/emotion4-2.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/22.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion4-2.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_22.wav", | |
"text_cn": "这让我想起了很多事。", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/emotion4-2.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion4-2.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_emotion4-3", | |
"input_path": "/input/emotion/emotion4-3.wav", | |
"text": "[emotion: angry]This reminds me of a lot of things.", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion4-3/emotion4-3.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/23.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion4-3.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_23.wav", | |
"text_cn": "这让我想起了很多事。", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/emotion4-3.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion4-3.wav", | |
"level": "L3" | |
}, | |
{ | |
"id": "emotion_emotion4-4", | |
"input_path": "/input/emotion/emotion4-4.wav", | |
"text": "[emotion: fealful]This reminds me of a lot of things.", | |
"task": "Emotion recognition and expression", | |
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?", | |
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion4-4/emotion4-4.wav", | |
"output_path_miniomni": "/output/Mini-Omni/emotion/24.wav", | |
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion4-4.wav", | |
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_24.wav", | |
"text_cn": "这让我想起了很多事。", | |
"language": "English", | |
"category": "Social Companionship", | |
"output_path_4o_cascade": "/output/cascade/emotion/emotion4-4.wav", | |
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion4-4.wav", | |
"level": "L3" | |
} | |
] |