File size: 9,747 Bytes
50ad069
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
[
    {
        "id": "tongue_twisters_audio_0",
        "input_path": "/input/tongue_twister/audio_0.mp3",
        "text": "Say the following sentence clearly: \"She sells seashells by the seashore.\"",
        "task": "Tongue twisters capabilities",
        "task_description": "Can the model correctly pronounce a given tongue twister?",
        "output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_0/audio_0.wav",
        "output_path_miniomni": "/output/Mini-Omni/tongue_twister/00.wav",
        "output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_0.wav",
        "output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_0.wav",
        "text_cn": "清楚地说:“她在海滨出售贝壳。”",
        "language": "English",
        "category": "Entertainment",
        "output_path_4o_cascade": "/output/cascade/tongue_twister/audio_0.wav",
        "output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_0.wav",
        "level": "L2"
    },
    {
        "id": "tongue_twisters_audio_2",
        "input_path": "/input/tongue_twister/audio_2.mp3",
        "text": "Say this sentence clearly without any errors: \"Betty bought a bit of butter, but the butter Betty bought was bitter.\"",
        "task": "Tongue twisters capabilities",
        "task_description": "Can the model correctly pronounce a given tongue twister?",
        "output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_2/audio_2.wav",
        "output_path_miniomni": "/output/Mini-Omni/tongue_twister/02.wav",
        "output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_2.wav",
        "output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_2.wav",
        "text_cn": "清楚地说这句话没有任何错误:“贝蒂买了一点黄油,但贝蒂买的黄油却很痛苦。”",
        "language": "English",
        "category": "Entertainment",
        "output_path_4o_cascade": "/output/cascade/tongue_twister/audio_2.wav",
        "output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_2.wav",
        "level": "L2"
    },
    {
        "id": "tongue_twisters_audio_3",
        "input_path": "/input/tongue_twister/audio_3.mp3",
        "text": "Please say this tongue twister carefully: The sixth sick sheik's sixth sheep's sick.",
        "task": "Tongue twisters capabilities",
        "task_description": "Can the model correctly pronounce a given tongue twister?",
        "output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_3/audio_3.wav",
        "output_path_miniomni": "/output/Mini-Omni/tongue_twister/03.wav",
        "output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_3.wav",
        "output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_3.wav",
        "text_cn": "请仔细地说出这种舌头的扭曲:第六个病酋长的第六只绵羊病了。",
        "language": "English",
        "category": "Entertainment",
        "output_path_4o_cascade": "/output/cascade/tongue_twister/audio_3.wav",
        "output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_3.wav",
        "level": "L2"
    },
    {
        "id": "tongue_twisters_audio_4",
        "input_path": "/input/tongue_twister/audio_4.mp3",
        "text": "Say the following clearly and at a regular pace: \"How can a clam cram in a clean cream can?\"",
        "task": "Tongue twisters capabilities",
        "task_description": "Can the model correctly pronounce a given tongue twister?",
        "output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_4/audio_4.wav",
        "output_path_miniomni": "/output/Mini-Omni/tongue_twister/04.wav",
        "output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_4.wav",
        "output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_4.wav",
        "text_cn": "清楚地说出以下速度:蛤lam塞在干净的奶油罐中如何?",
        "language": "English",
        "category": "Entertainment",
        "output_path_4o_cascade": "/output/cascade/tongue_twister/audio_4.wav",
        "output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_4.wav",
        "level": "L2"
    },
    {
        "id": "tongue_twisters_audio_7",
        "input_path": "/input/tongue_twister/audio_7.mp3",
        "text": "Say this sentence quickly without losing clarity: \"A box of mixed biscuits, a mixed biscuit box.\"",
        "task": "Tongue twisters capabilities",
        "task_description": "Can the model correctly pronounce a given tongue twister?",
        "output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_7/audio_7.wav",
        "output_path_miniomni": "/output/Mini-Omni/tongue_twister/07.wav",
        "output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_7.wav",
        "output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_5.wav",
        "text_cn": "迅速说出这句话而不会失去清晰度:一盒混合饼干,一个混合的饼干盒。",
        "language": "English",
        "category": "Entertainment",
        "output_path_4o_cascade": "/output/cascade/tongue_twister/audio_7.wav",
        "output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_7.wav",
        "level": "L2"
    },
    {
        "id": "tongue_twisters_audio_9",
        "input_path": "/input/tongue_twister/audio_9.mp3",
        "text": "Say this sentence with proper intonation: \"He thrusts his fists against the posts and still insists he sees the ghosts.\"",
        "task": "Tongue twisters capabilities",
        "task_description": "Can the model correctly pronounce a given tongue twister?",
        "output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_9/audio_9.wav",
        "output_path_miniomni": "/output/Mini-Omni/tongue_twister/09.wav",
        "output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_9.wav",
        "output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_6.wav",
        "text_cn": "用适当的语调说出这句话:他将拳头推向哨所,仍然坚持认为他看到鬼魂。",
        "language": "English",
        "category": "Entertainment",
        "output_path_4o_cascade": "/output/cascade/tongue_twister/audio_9.wav",
        "output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_9.wav",
        "level": "L2"
    },
    {
        "id": "tongue_twisters_audio_10",
        "input_path": "/input/tongue_twister/audio_10.mp3",
        "text": "Say this sentence with clear emphasis on alliteration: \"Fred fed Ted bread and Ted fed Fred bread.\"",
        "task": "Tongue twisters capabilities",
        "task_description": "Can the model correctly pronounce a given tongue twister?",
        "output_path_4o": "/output/ChatGPT-4o/tongue_twister/audio_10/audio_10.wav",
        "output_path_miniomni": "/output/Mini-Omni/tongue_twister/10.wav",
        "output_path_speechgpt": "/output/SpeechGPT/tongue_twister/answer_10.wav",
        "output_path_funaudio": "/output/FunAudioLLM/tongue_twister/audio_1.wav",
        "text_cn": "说出这句话,以明确的重视:弗雷德喂了塞德面包和泰德喂了弗雷德面包。",
        "language": "English",
        "category": "Entertainment",
        "output_path_4o_cascade": "/output/cascade/tongue_twister/audio_10.wav",
        "output_path_4o_llama_omni": "/output/LLaMA_omni/tongue_twister/audio_10.wav",
        "level": "L2"
    },
    {
        "id": "tongue_tongue_twister0_Neighbor_1",
        "input_path": "/input/noise/tongue_twister0_Neighbor_1.wav",
        "text": "[Add Neighbor noise]Say the following sentence clearly: \"She sells seashells by the seashore.\"",
        "noise":"Add Neighbor noise",
        "task": "Tongue twisters capabilities",
        "task_description": "Can the model correctly pronounce a given tongue twister?",
        "output_path_4o": "/output/ChatGPT-4o/noise/tongue_twister0_Neighbor_1/tongue_twister0_Neighbor_1.wav",
        "output_path_miniomni": "/output/Mini-Omni/noise/14.wav",
        "output_path_speechgpt": "/output/SpeechGPT/noise/tongue_twister0_Neighbor_1.wav",
        "output_path_funaudio": "/output/FunAudioLLM/noise/audio_14.wav",
        "text_cn": "清楚地说:“她在海滨出售贝壳。”",
        "language": "English",
        "category": "Entertainment",
        "output_path_4o_cascade": "/output/cascade/noise/tongue_twister0_Neighbor_1.wav",
        "output_path_4o_llama_omni": "/output/LLaMA_omni/noise/tongue_twister0_Neighbor_1.wav",
        "level": "L2"
    },
    {
        "id": "tongue_tongue_twister2_Neighbor_1",
        "input_path": "/input/noise/tongue_twister2_Neighbor_1.wav",
        "text": "[Add Neighbor noise]Say this sentence clearly without any errors: \"Betty bought a bit of butter, but the butter Betty bought was bitter.\"",
        "noise":"Add Neighbor noise",
        "task": "Tongue twisters capabilities",
        "task_description": "Can the model correctly pronounce a given tongue twister?",
        "output_path_4o": "/output/ChatGPT-4o/noise/tongue_twister2_Neighbor_1/tongue_twister2_Neighbor_1.wav",
        "output_path_miniomni": "/output/Mini-Omni/noise/15.wav",
        "output_path_speechgpt": "/output/SpeechGPT/noise/tongue_twister2_Neighbor_1.wav",
        "output_path_funaudio": "/output/FunAudioLLM/noise/audio_15.wav",
        "text_cn": "清楚地说这句话没有任何错误:“贝蒂买了一点黄油,但贝蒂买的黄油却很痛苦。”",
        "language": "English",
        "category": "Entertainment",
        "output_path_4o_cascade": "/output/cascade/noise/tongue_twister2_Neighbor_1.wav",
        "output_path_4o_llama_omni": "/output/LLaMA_omni/noise/tongue_twister2_Neighbor_1.wav",
        "level": "L2"
    }
]