Xinonria commited on
Commit
dd12bff
1 Parent(s): edf73e3

修复其他语言默认无法重定向到英语的错误

Browse files
Files changed (2) hide show
  1. app.py +613 -611
  2. i18n/i18n.py +4 -1
app.py CHANGED
@@ -1,611 +1,613 @@
1
- import time
2
- import os
3
- import logging
4
-
5
- import gradio as gr
6
- import numpy as np
7
- import pandas as pd
8
- from pypinyin import lazy_pinyin
9
- from i18n import gettext, Translate
10
-
11
- from api import generate_api, get_audio, generate_voice, load_characters_csv
12
- from utils import get_length
13
-
14
- # 翻译文件位置
15
- trans_file = os.path.join(os.path.dirname(__file__), "i18n", "translations.json")
16
-
17
- # 关闭aiohttp的DEBUG日志
18
- logging.getLogger("aiohttp").setLevel(logging.WARNING)
19
- # logging.getLogger("gradio").setLevel(logging.WARNING)
20
-
21
- # 带有时间的log
22
- logging.basicConfig(
23
- level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
24
- )
25
-
26
-
27
- header = """header"""
28
-
29
- terms = "terms"
30
-
31
-
32
- def update_all_characters(lang, category):
33
- new_characters, category = load_characters_csv(lang)
34
- initial_characters = get_characters(kind=category[0], all_characters=new_characters)
35
- return (
36
- new_characters,
37
- initial_characters,
38
- gr.Gallery(
39
- value=[[char["头像"], char["名称"]] for char in initial_characters],
40
- show_label=False,
41
- elem_id="character_gallery",
42
- columns=[11],
43
- object_fit="contain",
44
- height="auto",
45
- interactive=False,
46
- allow_preview=False,
47
- selected_index=None,
48
- ),
49
- category,
50
- gr.update(choices=category, value=category[0])
51
- )
52
-
53
-
54
- def get_characters(
55
- query=None, page=1, per_page=400, kind="原神", lang="zh", all_characters=None
56
- ):
57
- # 使用传入的 all_characters 参数
58
- filtered_characters = all_characters[all_characters["类别"] == kind]
59
-
60
- if query:
61
- # 使用拼音和汉字进行搜索
62
- filtered_characters = filtered_characters[
63
- filtered_characters["名称"].str.contains(query, case=False)
64
- ]
65
- if filtered_characters.empty and lang == "zh":
66
- filtered_characters = all_characters[all_characters["类别"] == kind]
67
- filtered_characters = filtered_characters[
68
- filtered_characters["名称"]
69
- .apply(lambda x: "".join(lazy_pinyin(x)))
70
- .str.contains(query, case=False)
71
- ]
72
-
73
- # 按名称分组,并选择每组的第一个记录
74
- unique_characters = (
75
- filtered_characters.groupby("名称").first().reset_index().sort_values(by="id")
76
- )
77
-
78
- # 处理头像数据
79
- import pickle
80
-
81
- def process_avatar(avatar):
82
- if not isinstance(avatar, str):
83
- try:
84
- return pickle.loads(bytes(avatar))
85
- except:
86
- return avatar
87
- return avatar
88
-
89
- unique_characters['头像'] = unique_characters['头像'].apply(process_avatar)
90
-
91
- # 应用分页
92
- start_index = (page - 1) * per_page
93
- end_index = start_index + per_page
94
-
95
- return unique_characters.iloc[start_index:end_index].to_dict("records")
96
-
97
-
98
- async def generate(selected_character=None, selected_characters=[], text="", lang="zh"):
99
- if selected_character:
100
- characters = [selected_character] + selected_characters
101
- else:
102
- characters = selected_characters
103
- if not selected_character and not selected_characters:
104
- if lang == "zh":
105
- raise gr.Error("请先选择一个角色")
106
- elif lang == "en":
107
- raise gr.Error("Please select a character first")
108
- elif lang == "ja":
109
- raise gr.Error("まず、キャラクターを選択してください")
110
- elif lang == "ko":
111
- raise gr.Error("먼저 캐릭터를 선택하세요")
112
- voice_ids = [char.get("voice_id") for char in characters if char.get("voice_id")]
113
-
114
- if not voice_ids:
115
- raise gr.Error("所选角色没有关联的 voice_id")
116
-
117
- start_time = time.time()
118
- # 假设我们只使用第一个选择的角色的名称
119
- if voice_ids == "1":
120
- if lang == "zh":
121
- raise gr.Error("该角色暂未创建语音")
122
- elif lang == "en":
123
- raise gr.Error("The character has not been created yet")
124
- elif lang == "ja":
125
- raise gr.Error("そのキャラクターの音声はまだ作成されていません")
126
- elif lang == "ko":
127
- raise gr.Error("해당 캐릭터의 음성이 아직 생성되지 않았습니다")
128
-
129
- if text == "":
130
- if lang == "zh":
131
- raise gr.Error("请输入需要合成的文本")
132
- elif lang == "en":
133
- raise gr.Error("Please enter the text to be synthesized")
134
- elif lang == "ja":
135
- raise gr.Error("合成するテキストを入力してください")
136
- elif lang == "ko":
137
- raise gr.Error("합성할 텍스트를 입력하세요")
138
-
139
- if get_length(text) > 1024:
140
- if lang == "zh":
141
- raise gr.Error("长度请控制在1024个字符以内")
142
- elif lang == "en":
143
- raise gr.Error("The text length exceeds 1024 words")
144
- elif lang == "ja":
145
- raise gr.Error("テキストの��さが1024文字を超えています")
146
- elif lang == "ko":
147
- raise gr.Error("텍스트 길이가 1024자를 초과합니다")
148
-
149
- audio = await generate_api(voice_ids, text)
150
- end_time = time.time()
151
- if lang == "zh":
152
- cost_time = f"合成共花费{end_time - start_time:.2f}秒"
153
- elif lang == "en":
154
- cost_time = (
155
- f"Total time spent synthesizing: {end_time - start_time:.2f} seconds"
156
- )
157
- elif lang == "ja":
158
- cost_time = f"合成にかかった時間: {end_time - start_time:.2f}秒"
159
- elif lang == "ko":
160
- cost_time = f"합성에 소요된 시간: {end_time - start_time:.2f}초"
161
- if isinstance(audio, str):
162
- print(audio)
163
- raise gr.Error(audio)
164
- else:
165
- return audio, cost_time
166
-
167
-
168
- def get_character_emotions(character, all_characters):
169
- # 从all_characters中筛选出与当前角色名称相同的所有记录
170
- character_records = all_characters[all_characters["名称"] == character["名称"]]
171
-
172
- # 按情绪去重并获取完整的角色信息
173
- character_infos = character_records.drop_duplicates(subset=["情绪"]).to_dict(
174
- "records"
175
- )
176
-
177
- # 如果没有找到角色信息,返回一个包含默认值的字典
178
- return (
179
- character_infos
180
- if character_infos
181
- else [{"名称": character["名称"], "情绪": "默认情绪"}]
182
- )
183
-
184
-
185
- def update_character_info(character_name, emotion, current_character, all_characters):
186
- character_info = None
187
- if character_name and emotion:
188
- character_info = all_characters[
189
- (all_characters["名称"] == character_name)
190
- & (all_characters["情绪"] == emotion)
191
- ]
192
- if character_name == "":
193
- return None
194
- character_info = character_info.iloc[0].to_dict()
195
- return character_info, all_characters
196
-
197
-
198
- def add_new_voice(current_character, selected_characters, kind, lang, all_characters):
199
- if not current_character:
200
- if lang == "zh":
201
- raise gr.Error("请先选择一个角色")
202
- elif lang == "en":
203
- raise gr.Error("Please select a character first")
204
- elif lang == "ja":
205
- raise gr.Error("まず、キャラクターを選択してください")
206
- elif lang == "ko":
207
- raise gr.Error("먼저 캐릭터를 선택하세요")
208
-
209
- if len(selected_characters) >= 5:
210
- raise gr.Error("已达到最大选择数(5个)")
211
-
212
- # 检查是否已存在相同角色
213
- existing_char = next(
214
- (
215
- char
216
- for char in selected_characters
217
- if char["名称"] == current_character["名称"]
218
- ),
219
- None,
220
- )
221
- if existing_char:
222
- # 如果情绪不同,更新情绪
223
- if existing_char["情绪"] != current_character["情绪"]:
224
- existing_char["情绪"] = current_character["情绪"]
225
- else:
226
- selected_characters.insert(0, current_character)
227
-
228
- updated_characters = get_characters(
229
- kind=kind, lang=lang, all_characters=all_characters
230
- )
231
- # ! 取消gallery选中状态,返回个新的gallery是必要的,否则会保留上一次的选中状态。这里sonnet很喜欢改成返回一个数组,但这不能清空gallery的选中状态
232
- updated_gallery = gr.Gallery(
233
- value=[[char["头像"], char["名称"]] for char in updated_characters],
234
- show_label=False,
235
- elem_id="character_gallery",
236
- columns=[11],
237
- object_fit="contain",
238
- height="auto",
239
- interactive=False,
240
- allow_preview=False,
241
- selected_index=None,
242
- )
243
-
244
- return (
245
- None,
246
- gr.update(value=""),
247
- gr.update(choices=[]),
248
- selected_characters,
249
- updated_characters,
250
- updated_gallery,
251
- gr.update(visible=True),
252
- all_characters,
253
- )
254
-
255
-
256
- def update_selected_chars_display(selected_characters):
257
- updates = []
258
- for i, (name, emotion, _, row) in enumerate(selected_chars_rows):
259
- if i < len(selected_characters):
260
- char = selected_characters[i]
261
- updates.extend(
262
- [
263
- gr.update(value=char["名称"], visible=True),
264
- gr.update(value=char["情绪"], visible=True),
265
- gr.update(visible=True),
266
- gr.update(visible=True),
267
- ]
268
- )
269
- else:
270
- updates.extend(
271
- [
272
- gr.update(value="", visible=False),
273
- gr.update(value="", visible=False),
274
- gr.update(visible=False),
275
- gr.update(visible=False),
276
- ]
277
- )
278
- return updates
279
-
280
-
281
- def remove_character(index, selected_characters):
282
- if 0 <= index < len(selected_characters):
283
- del selected_characters[index]
284
- return selected_characters, gr.update(visible=True)
285
-
286
-
287
- def update_gallery(kind, query, all_characters):
288
- updated_characters = get_characters(
289
- kind=kind, query=query, lang=lang, all_characters=all_characters
290
- )
291
- return (
292
- updated_characters,
293
- [[char["头像"], char["名称"]] for char in updated_characters],
294
- all_characters,
295
- )
296
-
297
-
298
- def on_select(evt: gr.SelectData, characters, selected_characters, all_characters):
299
- # 如果没有选择角色,换人的时候清空
300
- if len(selected_characters) == 0:
301
- selected_characters = []
302
-
303
- selected = characters[evt.index]
304
- emotions = get_character_emotions(selected, all_characters)
305
- normal_index = 0
306
- for index, emotion in enumerate(emotions):
307
- if (
308
- emotion["情绪"] == "正常"
309
- or emotion["情绪"] == "보통"
310
- or emotion["情绪"] == "normal"
311
- ):
312
- normal_index = index
313
- break
314
-
315
- default_emotion = emotions[normal_index]["情绪"] if emotions else ""
316
- default_voice_id = emotions[normal_index]["voice_id"] if emotions else ""
317
-
318
- character_dict = selected.copy()
319
- character_dict["情绪"] = default_emotion
320
- character_dict["voice_id"] = default_voice_id
321
- return (
322
- selected["名称"],
323
- gr.Dropdown(
324
- choices=[emotion["情绪"] for emotion in emotions], value=default_emotion
325
- ),
326
- character_dict,
327
- selected_characters,
328
- )
329
-
330
-
331
- async def update_prompt_audio(current_character):
332
- if current_character:
333
- return await get_audio(current_character.get("voice_id"))
334
- else:
335
- return None
336
-
337
- async def create_voice(avatar, name, emotion, tags, gender, audio_data, lang):
338
- updates = {}
339
- for field, value in [("avatar", avatar), ("name", name), ("emotion", emotion), ("tags", tags), ("gender", gender), ("audio_data", audio_data)]:
340
- if field in ["avatar", "audio_data"]:
341
- if value is None or (isinstance(value, np.ndarray) and value.size == 0):
342
- updates[field] = gr.update(value=None)
343
- elif value == "":
344
- updates[field] = gr.update(value="")
345
-
346
- if updates:
347
- if lang == "zh":
348
- gr.Warning("请填写完整信息")
349
- elif lang == "en":
350
- gr.Warning("Please fill in all the information")
351
- elif lang == "ja":
352
- gr.Warning("すべての情報を入力してください")
353
- elif lang == "ko":
354
- gr.Warning("모든 정보를 입력하세요")
355
- return tuple(updates.get(field, gr.update()) for field in ["avatar", "name", "emotion", "tags", "gender", "audio_data"])
356
- duration = len(audio_data[1]) / audio_data[0]
357
- if duration < 3.2 or duration > 8:
358
- if lang == "zh":
359
- gr.Warning("音频时长请控制在3.2-8秒之间")
360
- elif lang == "en":
361
- gr.Warning("The audio duration should be between 3.2 and 8 seconds")
362
- elif lang == "ja":
363
- gr.Warning("音声の長さは3.2秒から8秒の間にしてください")
364
- elif lang == "ko":
365
- gr.Warning("음성 길이는 3.2초에서 8초 사이로 설정해야 합니다")
366
- return avatar, name, emotion, tags, gender, audio_data
367
- await generate_voice(avatar, name, emotion, tags, gender, audio_data, lang)
368
- if lang == "zh":
369
- gr.Info("创建成功,您创建的语音将在审核后上线")
370
- elif lang == "en":
371
- gr.Info("Creation successful. The voice you created will be available after review.")
372
- elif lang == "ja":
373
- gr.Info("作成が完了しました。作成された音声は審査後に公開されます。")
374
- elif lang == "ko":
375
- gr.Info("생성 완료. 귀하가 생성한 음성은 검토 후 공개될 예정입니다.")
376
- return avatar, name, emotion, tags, gender, audio_data
377
-
378
- head = """
379
- <title>Free Online Text to Speech (TTS) | Convert Text to Audio</title>
380
- <meta name="description" content="Text to Speech(TTS) for free! 5-second voice cloning, no sign-up required.">
381
- <meta name="keywords" content="text to speech, TTS, free TTS, online TTS, speech synthesis, voice generator">
382
- """
383
- with gr.Blocks(title="Online Free TTS", theme=gr.themes.Soft(), head=head) as demo:
384
- gr.Markdown(
385
- "Online Free TTS(Text-to-Speech). Ultra-low latency, 5-second voice cloning."
386
- )
387
- lang = gr.Radio(
388
- choices=[("中文", "zh"), ("English", "en"), ("日本語", "ja"), ("한국인", "ko")],
389
- label=gettext("Language"),
390
- value="en",
391
- scale=1,
392
- )
393
- all_characters_state = gr.State(load_characters_csv("en")[0])
394
- category = gr.State(load_characters_csv("en")[1])
395
-
396
- with Translate(trans_file, lang, placeholder_langs=["en", "zh", "ja", "ko"]):
397
- gr.Markdown(value=gettext(header))
398
- with gr.Group():
399
- initial_characters = get_characters(
400
- kind="原神", lang="zh", all_characters=all_characters_state.value
401
- )
402
- characters = gr.State(initial_characters)
403
- selected_characters = gr.State([])
404
- current_character = gr.State(None)
405
-
406
- with gr.Tab(gettext("Synthesis Voice")):
407
- with gr.Blocks():
408
- with gr.Row():
409
- kind = gr.Dropdown(
410
- choices=category.value,
411
- value="原神",
412
- label=gettext("Select character category"),
413
- )
414
- query = gr.Textbox(
415
- label=gettext("Search character"),
416
- value="",
417
- lines=1,
418
- max_lines=1,
419
- interactive=True,
420
- )
421
- with gr.Blocks():
422
- gallery = gr.Gallery(
423
- value=[
424
- [char["头像"], char["名称"]] for char in characters.value
425
- ],
426
- show_label=False,
427
- elem_id="character_gallery",
428
- columns=[11],
429
- object_fit="contain",
430
- height="auto",
431
- interactive=False,
432
- allow_preview=False,
433
- selected_index=None,
434
- )
435
- with gr.Row():
436
- character_name = gr.Textbox(
437
- label=gettext("Currently selected character"),
438
- interactive=False,
439
- max_lines=1,
440
- )
441
- info_type = gr.Dropdown(choices=[], label=gettext("Select emotion"))
442
- with gr.Row():
443
- add_voice_button = gr.Button(
444
- gettext("Add new voice"), variant="primary"
445
- )
446
-
447
- selected_chars_container = gr.Column(
448
- elem_id="selected_chars_container", visible=False
449
- )
450
-
451
- with selected_chars_container:
452
- gr.Markdown(gettext("### Selected characters"))
453
- selected_chars_rows = []
454
- for i in range(5): # 假设最多选择5个角色
455
- with gr.Row() as row:
456
- name = gr.Textbox(
457
- label=gettext("Name"), interactive=False, max_lines=1
458
- )
459
- emotion = gr.Textbox(
460
- label=gettext("Emotion"), interactive=False, max_lines=1
461
- )
462
- delete_btn = gr.Button(gettext("Delete"), scale=0)
463
- selected_chars_rows.append((name, emotion, delete_btn, row))
464
-
465
- with gr.Row():
466
- with gr.Column():
467
- text = gr.Textbox(
468
- label=gettext("Text to synthesize"),
469
- value="",
470
- lines=10,
471
- max_lines=10,
472
- )
473
- inference_button = gr.Button(
474
- gettext("🎉 Synthesize Voice 🎉"), variant="primary", size="lg"
475
- )
476
- with gr.Column():
477
- prompt_audio = gr.Audio(
478
- label=gettext("Reference audio for synthesis"),
479
- interactive=False,
480
- type="numpy",
481
- )
482
- output = gr.Audio(
483
- label=gettext("Output audio"), interactive=False, type="numpy"
484
- )
485
- cost_time = gr.Textbox(
486
- label=gettext("Synthesis time"),
487
- interactive=False,
488
- show_label=False,
489
- max_lines=1,
490
- )
491
- try:
492
- inference_button.click(
493
- fn=generate,
494
- inputs=[current_character, selected_characters, text, lang],
495
- outputs=[output, cost_time],
496
- )
497
- except gr.Error as e:
498
- gr.Error(e)
499
- except Exception as e:
500
- pass
501
-
502
- with gr.Tab(gettext("Create Voice")):
503
- with gr.Row():
504
- avatar = gr.Image(label=gettext("Avatar"), interactive=True, type="pil", image_mode="RGBA")
505
- with gr.Column():
506
- with gr.Row():
507
- name = gr.Textbox(
508
- label=gettext("Name"), interactive=True, max_lines=1
509
- )
510
- emotion = gr.Textbox(
511
- label=gettext("Emotion\n(Happy, Sad, Angry)"), interactive=True, max_lines=1
512
- )
513
- tags = gr.Textbox(
514
- label=gettext("Tags\n(Genshin, Cute, Girl, Boy, etc.)"), interactive=True, max_lines=1
515
- )
516
- gender = gr.Dropdown(
517
- label=gettext("Gender"),
518
- choices=[
519
- (gettext("Male"), "male"),
520
- (gettext("Female"), "female"),
521
- (gettext("Non-Binary"), "non-binary"),
522
- ],
523
- interactive=True,
524
- )
525
- audio_data = gr.Audio(label=gettext("Prompt Audio(min 3.2s, max 8s)"), interactive=True)
526
- create_button = gr.Button(gettext("Create Voice"), variant="primary")
527
-
528
- gr.Markdown(gettext(terms))
529
- # -------------- 绑定事件 --------------
530
-
531
- lang.change(
532
- fn=update_all_characters,
533
- inputs=[lang, category],
534
- outputs=[all_characters_state, characters, gallery, category, kind],
535
- )
536
-
537
- add_voice_button.click(
538
- fn=add_new_voice,
539
- inputs=[
540
- current_character,
541
- selected_characters,
542
- kind,
543
- lang,
544
- all_characters_state,
545
- ],
546
- outputs=[
547
- current_character,
548
- character_name,
549
- info_type,
550
- selected_characters,
551
- characters,
552
- gallery,
553
- selected_chars_container,
554
- all_characters_state,
555
- ],
556
- ).then(
557
- fn=update_selected_chars_display,
558
- inputs=[selected_characters],
559
- outputs=[item for row in selected_chars_rows for item in row],
560
- )
561
-
562
- gallery.select(
563
- fn=on_select,
564
- inputs=[characters, selected_characters, all_characters_state],
565
- outputs=[character_name, info_type, current_character, selected_characters],
566
- ).then(
567
- fn=update_prompt_audio, inputs=[current_character], outputs=[prompt_audio]
568
- )
569
-
570
- info_type.change(
571
- fn=update_character_info,
572
- inputs=[character_name, info_type, current_character, all_characters_state],
573
- outputs=[current_character, all_characters_state],
574
- ).then(
575
- fn=update_prompt_audio, inputs=[current_character], outputs=[prompt_audio]
576
- )
577
-
578
- for i, (_, _, delete_btn, _) in enumerate(selected_chars_rows):
579
- delete_btn.click(
580
- fn=remove_character,
581
- inputs=[gr.Number(value=i, visible=False), selected_characters],
582
- outputs=[selected_characters, selected_chars_container],
583
- ).then(
584
- fn=update_selected_chars_display,
585
- inputs=[selected_characters],
586
- outputs=[item for row in selected_chars_rows for item in row],
587
- )
588
-
589
- kind.change(
590
- fn=update_gallery,
591
- inputs=[kind, query, all_characters_state],
592
- outputs=[characters, gallery, all_characters_state],
593
- )
594
-
595
- query.change(
596
- fn=update_gallery,
597
- inputs=[kind, query, all_characters_state],
598
- outputs=[characters, gallery, all_characters_state],
599
- )
600
-
601
- create_button.click(
602
- fn=create_voice,
603
- inputs=[avatar, name, emotion, tags, gender, audio_data, lang],
604
- outputs=[avatar, name, emotion, tags, gender, audio_data],
605
- )
606
-
607
-
608
- if __name__ == "__main__":
609
- demo.queue(default_concurrency_limit=None).launch(
610
- show_api=False
611
- )
 
 
 
1
+ import time
2
+ import os
3
+ import logging
4
+
5
+ import gradio as gr
6
+ import numpy as np
7
+ import pandas as pd
8
+ from pypinyin import lazy_pinyin
9
+ from i18n import gettext, Translate
10
+
11
+ from api import generate_api, get_audio, generate_voice, load_characters_csv
12
+ from utils import get_length
13
+
14
+ # 翻译文件位置
15
+ trans_file = os.path.join(os.path.dirname(__file__), "i18n", "translations.json")
16
+
17
+ # 关闭aiohttp的DEBUG日志
18
+ logging.getLogger("aiohttp").setLevel(logging.WARNING)
19
+ # logging.getLogger("gradio").setLevel(logging.WARNING)
20
+
21
+ # 带有时间的log
22
+ logging.basicConfig(
23
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
24
+ )
25
+
26
+
27
+ header = """header"""
28
+
29
+ terms = "terms"
30
+
31
+
32
+ def update_all_characters(lang, category):
33
+ new_characters, category = load_characters_csv(lang)
34
+ initial_characters = get_characters(kind=category[0], all_characters=new_characters)
35
+ return (
36
+ new_characters,
37
+ initial_characters,
38
+ gr.Gallery(
39
+ value=[[char["头像"], char["名称"]] for char in initial_characters],
40
+ show_label=False,
41
+ elem_id="character_gallery",
42
+ columns=[11],
43
+ object_fit="contain",
44
+ height="auto",
45
+ interactive=False,
46
+ allow_preview=False,
47
+ selected_index=None,
48
+ ),
49
+ category,
50
+ gr.update(choices=category, value=category[0])
51
+ )
52
+
53
+
54
+ def get_characters(
55
+ query=None, page=1, per_page=400, kind="原神", lang="zh", all_characters=None
56
+ ):
57
+ # 使用传入的 all_characters 参数
58
+ filtered_characters = all_characters[all_characters["类别"] == kind]
59
+
60
+ if query:
61
+ # 使用拼音和汉字进行搜索
62
+ filtered_characters = filtered_characters[
63
+ filtered_characters["名称"].str.contains(query, case=False)
64
+ ]
65
+ if filtered_characters.empty and lang == "zh":
66
+ filtered_characters = all_characters[all_characters["类别"] == kind]
67
+ filtered_characters = filtered_characters[
68
+ filtered_characters["名称"]
69
+ .apply(lambda x: "".join(lazy_pinyin(x)))
70
+ .str.contains(query, case=False)
71
+ ]
72
+
73
+ # 按名称分组,并选择每组的第一个记录
74
+ unique_characters = (
75
+ filtered_characters.groupby("名称").first().reset_index().sort_values(by="id")
76
+ )
77
+
78
+ # 处理头像数据
79
+ import pickle
80
+
81
+ def process_avatar(avatar):
82
+ if not isinstance(avatar, str):
83
+ try:
84
+ return pickle.loads(bytes(avatar))
85
+ except:
86
+ return avatar
87
+ return avatar
88
+
89
+ unique_characters['头像'] = unique_characters['头像'].apply(process_avatar)
90
+
91
+ # 应用分页
92
+ start_index = (page - 1) * per_page
93
+ end_index = start_index + per_page
94
+
95
+ return unique_characters.iloc[start_index:end_index].to_dict("records")
96
+
97
+
98
+ async def generate(selected_character=None, selected_characters=[], text="", lang="zh"):
99
+ if selected_character:
100
+ characters = [selected_character] + selected_characters
101
+ else:
102
+ characters = selected_characters
103
+ if not selected_character and not selected_characters:
104
+ if lang == "zh":
105
+ raise gr.Error("请先选择一个角色")
106
+ elif lang == "en":
107
+ raise gr.Error("Please select a character first")
108
+ elif lang == "ja":
109
+ raise gr.Error("まず、キャラクターを選択してください")
110
+ elif lang == "ko":
111
+ raise gr.Error("먼저 캐릭터를 선택하세요")
112
+ voice_ids = [char.get("voice_id") for char in characters if char.get("voice_id")]
113
+
114
+ if not voice_ids:
115
+ raise gr.Error("所选角色没有关联的 voice_id")
116
+
117
+ start_time = time.time()
118
+ # 假设我们只使用第一个选择的角色的名称
119
+ if voice_ids == "1":
120
+ if lang == "zh":
121
+ raise gr.Error("该角色暂未创建语音")
122
+ elif lang == "en":
123
+ raise gr.Error("The character has not been created yet")
124
+ elif lang == "ja":
125
+ raise gr.Error("そのキャラクターの音声はまだ作成されていません")
126
+ elif lang == "ko":
127
+ raise gr.Error("해당 캐릭터의 음성이 아직 생성되지 않았습니다")
128
+
129
+ if text == "":
130
+ if lang == "zh":
131
+ raise gr.Error("请输入需要合成的文本")
132
+ elif lang == "en":
133
+ raise gr.Error("Please enter the text to be synthesized")
134
+ elif lang == "ja":
135
+ raise gr.Error("合成するテキストを入力してください")
136
+ elif lang == "ko":
137
+ raise gr.Error("합성할 텍스트를 입력하세요")
138
+
139
+ if get_length(text) > 1024:
140
+ if lang == "zh":
141
+ raise gr.Error("长度请控制在1024个字符以内")
142
+ elif lang == "en":
143
+ raise gr.Error("The text length exceeds 1024 words")
144
+ elif lang == "ja":
145
+ raise gr.Error("テキストの長さが1024文字を超えています")
146
+ elif lang == "ko":
147
+ raise gr.Error("텍스트 길이가 1024자를 초과합니다")
148
+
149
+ audio = await generate_api(voice_ids, text)
150
+ end_time = time.time()
151
+ if lang == "zh":
152
+ cost_time = f"合成共花费{end_time - start_time:.2f}秒"
153
+ elif lang == "en":
154
+ cost_time = (
155
+ f"Total time spent synthesizing: {end_time - start_time:.2f} seconds"
156
+ )
157
+ elif lang == "ja":
158
+ cost_time = f"合成にかかった時間: {end_time - start_time:.2f}秒"
159
+ elif lang == "ko":
160
+ cost_time = f"합성에 소요된 시간: {end_time - start_time:.2f}초"
161
+ if isinstance(audio, str):
162
+ print(audio)
163
+ raise gr.Error(audio)
164
+ else:
165
+ return audio, cost_time
166
+
167
+
168
+ def get_character_emotions(character, all_characters):
169
+ # 从all_characters中筛选出与当前角色名称相同的所有记录
170
+ character_records = all_characters[all_characters["名称"] == character["名称"]]
171
+
172
+ # 按情绪去重并获取完整的角色信息
173
+ character_infos = character_records.drop_duplicates(subset=["情绪"]).to_dict(
174
+ "records"
175
+ )
176
+
177
+ # 如果没有找到角色信息,返回一个包含默认值的字典
178
+ return (
179
+ character_infos
180
+ if character_infos
181
+ else [{"名称": character["名称"], "情绪": "默认情绪"}]
182
+ )
183
+
184
+
185
+ def update_character_info(character_name, emotion, current_character, all_characters):
186
+ character_info = None
187
+ if character_name and emotion:
188
+ character_info = all_characters[
189
+ (all_characters["名称"] == character_name)
190
+ & (all_characters["情绪"] == emotion)
191
+ ]
192
+ if character_name == "":
193
+ return None
194
+ character_info = character_info.iloc[0].to_dict()
195
+ return character_info, all_characters
196
+
197
+
198
+ def add_new_voice(current_character, selected_characters, kind, lang, all_characters):
199
+ if not current_character:
200
+ if lang == "zh":
201
+ raise gr.Error("请先选择一个角色")
202
+ elif lang == "en":
203
+ raise gr.Error("Please select a character first")
204
+ elif lang == "ja":
205
+ raise gr.Error("まず、キャラクターを選択してください")
206
+ elif lang == "ko":
207
+ raise gr.Error("먼저 캐릭터를 선택하세요")
208
+
209
+ if len(selected_characters) >= 5:
210
+ raise gr.Error("已达到最大选择数(5个)")
211
+
212
+ # 检查是否已存在相同角色
213
+ existing_char = next(
214
+ (
215
+ char
216
+ for char in selected_characters
217
+ if char["名称"] == current_character["名称"]
218
+ ),
219
+ None,
220
+ )
221
+ if existing_char:
222
+ # 如果情绪不同,更新情绪
223
+ if existing_char["情绪"] != current_character["情绪"]:
224
+ existing_char["情绪"] = current_character["情绪"]
225
+ else:
226
+ selected_characters.insert(0, current_character)
227
+
228
+ updated_characters = get_characters(
229
+ kind=kind, lang=lang, all_characters=all_characters
230
+ )
231
+ # ! 取消gallery选中状态,返回个新的gallery是必要的,否则会保留上一次的选中状态。这里sonnet很喜欢改成返回一个数组,但这不能清空gallery的选中状态
232
+ updated_gallery = gr.Gallery(
233
+ value=[[char["头像"], char["名称"]] for char in updated_characters],
234
+ show_label=False,
235
+ elem_id="character_gallery",
236
+ columns=[11],
237
+ object_fit="contain",
238
+ height="auto",
239
+ interactive=False,
240
+ allow_preview=False,
241
+ selected_index=None,
242
+ )
243
+
244
+ return (
245
+ None,
246
+ gr.update(value=""),
247
+ gr.update(choices=[]),
248
+ selected_characters,
249
+ updated_characters,
250
+ updated_gallery,
251
+ gr.update(visible=True),
252
+ all_characters,
253
+ )
254
+
255
+
256
+ def update_selected_chars_display(selected_characters):
257
+ updates = []
258
+ for i, (name, emotion, _, row) in enumerate(selected_chars_rows):
259
+ if i < len(selected_characters):
260
+ char = selected_characters[i]
261
+ updates.extend(
262
+ [
263
+ gr.update(value=char["名称"], visible=True),
264
+ gr.update(value=char["情绪"], visible=True),
265
+ gr.update(visible=True),
266
+ gr.update(visible=True),
267
+ ]
268
+ )
269
+ else:
270
+ updates.extend(
271
+ [
272
+ gr.update(value="", visible=False),
273
+ gr.update(value="", visible=False),
274
+ gr.update(visible=False),
275
+ gr.update(visible=False),
276
+ ]
277
+ )
278
+ return updates
279
+
280
+
281
+ def remove_character(index, selected_characters):
282
+ if 0 <= index < len(selected_characters):
283
+ del selected_characters[index]
284
+ return selected_characters, gr.update(visible=True)
285
+
286
+
287
+ def update_gallery(kind, query, all_characters):
288
+ updated_characters = get_characters(
289
+ kind=kind, query=query, lang=lang, all_characters=all_characters
290
+ )
291
+ return (
292
+ updated_characters,
293
+ [[char["头像"], char["名称"]] for char in updated_characters],
294
+ all_characters,
295
+ )
296
+
297
+
298
+ def on_select(evt: gr.SelectData, characters, selected_characters, all_characters):
299
+ # 如果没有选择角色,换人的时候清空
300
+ if len(selected_characters) == 0:
301
+ selected_characters = []
302
+
303
+ selected = characters[evt.index]
304
+ emotions = get_character_emotions(selected, all_characters)
305
+ normal_index = 0
306
+ for index, emotion in enumerate(emotions):
307
+ if (
308
+ emotion["情绪"] == "正常"
309
+ or emotion["情绪"] == "보통"
310
+ or emotion["情绪"] == "normal"
311
+ ):
312
+ normal_index = index
313
+ break
314
+
315
+ default_emotion = emotions[normal_index]["情绪"] if emotions else ""
316
+ default_voice_id = emotions[normal_index]["voice_id"] if emotions else ""
317
+
318
+ character_dict = selected.copy()
319
+ character_dict["情绪"] = default_emotion
320
+ character_dict["voice_id"] = default_voice_id
321
+ return (
322
+ selected["名称"],
323
+ gr.Dropdown(
324
+ choices=[emotion["情绪"] for emotion in emotions], value=default_emotion
325
+ ),
326
+ character_dict,
327
+ selected_characters,
328
+ )
329
+
330
+
331
+ async def update_prompt_audio(current_character):
332
+ if current_character:
333
+ return await get_audio(current_character.get("voice_id"))
334
+ else:
335
+ return None
336
+
337
+ async def create_voice(avatar, name, emotion, tags, gender, audio_data, lang):
338
+ updates = {}
339
+ for field, value in [("avatar", avatar), ("name", name), ("emotion", emotion), ("tags", tags), ("gender", gender), ("audio_data", audio_data)]:
340
+ if field in ["avatar", "audio_data"]:
341
+ if value is None or (isinstance(value, np.ndarray) and value.size == 0):
342
+ updates[field] = gr.update(value=None)
343
+ elif value == "":
344
+ updates[field] = gr.update(value="")
345
+
346
+ if updates:
347
+ if lang == "zh":
348
+ gr.Warning("请填写完整信息")
349
+ elif lang == "en":
350
+ gr.Warning("Please fill in all the information")
351
+ elif lang == "ja":
352
+ gr.Warning("すべての情報を入力してください")
353
+ elif lang == "ko":
354
+ gr.Warning("모든 정보를 입력하세요")
355
+ return tuple(updates.get(field, gr.update()) for field in ["avatar", "name", "emotion", "tags", "gender", "audio_data"])
356
+ duration = len(audio_data[1]) / audio_data[0]
357
+ if duration < 3.2 or duration > 8:
358
+ if lang == "zh":
359
+ gr.Warning("音频时长请控制在3.2-8秒之间")
360
+ elif lang == "en":
361
+ gr.Warning("The audio duration should be between 3.2 and 8 seconds")
362
+ elif lang == "ja":
363
+ gr.Warning("音声の長さは3.2秒から8秒の間にしてください")
364
+ elif lang == "ko":
365
+ gr.Warning("음성 길이는 3.2초에서 8초 사이로 설정해야 합니다")
366
+ return avatar, name, emotion, tags, gender, audio_data
367
+ await generate_voice(avatar, name, emotion, tags, gender, audio_data, lang)
368
+ if lang == "zh":
369
+ gr.Info("创建成功,您创建的语音将在审核后上线")
370
+ elif lang == "en":
371
+ gr.Info("Creation successful. The voice you created will be available after review.")
372
+ elif lang == "ja":
373
+ gr.Info("作成が完了しました。作成された音声は審査後に公開されます。")
374
+ elif lang == "ko":
375
+ gr.Info("생성 완료. 귀하가 생성한 음성은 검토 후 공개될 예정입니다.")
376
+ return avatar, name, emotion, tags, gender, audio_data
377
+
378
+ head = """
379
+ <title>Free Online Text to Speech (TTS) | Convert Text to Audio</title>
380
+ <meta name="description" content="Text to Speech(TTS) for free! 5-second voice cloning, no sign-up required.">
381
+ <meta name="keywords" content="text to speech, TTS, free TTS, online TTS, speech synthesis, voice generator">
382
+ """
383
+ with gr.Blocks(title="Online Free TTS", theme=gr.themes.Soft(), head=head) as demo:
384
+ gr.Markdown(
385
+ "Online Free TTS(Text-to-Speech). Ultra-low latency, 5-second voice cloning."
386
+ )
387
+ lang = gr.Radio(
388
+ choices=[("中文", "zh"), ("English", "en"), ("日本語", "ja"), ("한국인", "ko")],
389
+ label=gettext("Language"),
390
+ value="en",
391
+ scale=1,
392
+ )
393
+ all_characters_state = gr.State(load_characters_csv("en")[0])
394
+ category = gr.State(load_characters_csv("en")[1])
395
+
396
+ with Translate(trans_file, lang, placeholder_langs=["en", "zh", "ja", "ko"]):
397
+ gr.Markdown(value=gettext(header))
398
+ with gr.Group():
399
+ initial_characters = get_characters(
400
+ kind="原神", lang="zh", all_characters=all_characters_state.value
401
+ )
402
+ characters = gr.State(initial_characters)
403
+ selected_characters = gr.State([])
404
+ current_character = gr.State(None)
405
+
406
+ with gr.Tab(gettext("Synthesis Voice")):
407
+ with gr.Blocks():
408
+ with gr.Row():
409
+ kind = gr.Dropdown(
410
+ choices=category.value,
411
+ value=category.value[0],
412
+ label=gettext("Select character category"),
413
+ )
414
+ query = gr.Textbox(
415
+ label=gettext("Search character"),
416
+ value="",
417
+ lines=1,
418
+ max_lines=1,
419
+ interactive=True,
420
+ )
421
+ with gr.Blocks():
422
+ gallery = gr.Gallery(
423
+ value=[
424
+ [char["头像"], char["名称"]] for char in characters.value
425
+ ],
426
+ show_label=False,
427
+ elem_id="character_gallery",
428
+ columns=[11],
429
+ object_fit="contain",
430
+ height="auto",
431
+ interactive=False,
432
+ allow_preview=False,
433
+ selected_index=None,
434
+ )
435
+ with gr.Row():
436
+ character_name = gr.Textbox(
437
+ label=gettext("Currently selected character"),
438
+ interactive=False,
439
+ max_lines=1,
440
+ )
441
+ info_type = gr.Dropdown(choices=[], label=gettext("Select emotion"))
442
+ with gr.Row():
443
+ add_voice_button = gr.Button(
444
+ gettext("Add new voice"), variant="primary"
445
+ )
446
+
447
+ selected_chars_container = gr.Column(
448
+ elem_id="selected_chars_container", visible=False
449
+ )
450
+
451
+ with selected_chars_container:
452
+ gr.Markdown(gettext("### Selected characters"))
453
+ selected_chars_rows = []
454
+ for i in range(5): # 假设最多选择5个角色
455
+ with gr.Row() as row:
456
+ name = gr.Textbox(
457
+ label=gettext("Name"), interactive=False, max_lines=1
458
+ )
459
+ emotion = gr.Textbox(
460
+ label=gettext("Emotion"), interactive=False, max_lines=1
461
+ )
462
+ delete_btn = gr.Button(gettext("Delete"), scale=0)
463
+ selected_chars_rows.append((name, emotion, delete_btn, row))
464
+
465
+ with gr.Row():
466
+ with gr.Column():
467
+ text = gr.Textbox(
468
+ label=gettext("Text to synthesize"),
469
+ value="",
470
+ lines=10,
471
+ max_lines=10,
472
+ )
473
+ inference_button = gr.Button(
474
+ gettext("🎉 Synthesize Voice 🎉"), variant="primary", size="lg"
475
+ )
476
+ with gr.Column():
477
+ prompt_audio = gr.Audio(
478
+ label=gettext("Reference audio for synthesis"),
479
+ interactive=False,
480
+ type="numpy",
481
+ )
482
+ output = gr.Audio(
483
+ label=gettext("Output audio"), interactive=False, type="numpy"
484
+ )
485
+ cost_time = gr.Textbox(
486
+ label=gettext("Synthesis time"),
487
+ interactive=False,
488
+ show_label=False,
489
+ max_lines=1,
490
+ )
491
+ try:
492
+ inference_button.click(
493
+ fn=generate,
494
+ inputs=[current_character, selected_characters, text, lang],
495
+ outputs=[output, cost_time],
496
+ )
497
+ except gr.Error as e:
498
+ gr.Error(e)
499
+ except Exception as e:
500
+ pass
501
+
502
+ with gr.Tab(gettext("Create Voice")):
503
+ with gr.Row():
504
+ avatar = gr.Image(label=gettext("Avatar"), interactive=True, type="pil", image_mode="RGBA")
505
+ with gr.Column():
506
+ with gr.Row():
507
+ name = gr.Textbox(
508
+ label=gettext("Name"), interactive=True, max_lines=1
509
+ )
510
+ emotion = gr.Textbox(
511
+ label=gettext("Emotion\n(Happy, Sad, Angry)"), interactive=True, max_lines=1
512
+ )
513
+ tags = gr.Textbox(
514
+ label=gettext("Tags\n(Genshin, Cute, Girl, Boy, etc.)"), interactive=True, max_lines=1
515
+ )
516
+ gender = gr.Dropdown(
517
+ label=gettext("Gender"),
518
+ choices=[
519
+ (gettext("Male"), "male"),
520
+ (gettext("Female"), "female"),
521
+ (gettext("Non-Binary"), "non-binary"),
522
+ ],
523
+ interactive=True,
524
+ )
525
+ audio_data = gr.Audio(label=gettext("Prompt Audio(min 3.2s, max 8s)"), interactive=True)
526
+ create_button = gr.Button(gettext("Create Voice"), variant="primary")
527
+
528
+ gr.Markdown(gettext(terms))
529
+ # -------------- 绑定事件 --------------
530
+
531
+ lang.change(
532
+ fn=update_all_characters,
533
+ inputs=[lang, category],
534
+ outputs=[all_characters_state, characters, gallery, category, kind],
535
+ )
536
+
537
+ demo.load(update_all_characters, inputs=[lang, category], outputs=[all_characters_state, characters, gallery, category, kind])
538
+
539
+ add_voice_button.click(
540
+ fn=add_new_voice,
541
+ inputs=[
542
+ current_character,
543
+ selected_characters,
544
+ kind,
545
+ lang,
546
+ all_characters_state,
547
+ ],
548
+ outputs=[
549
+ current_character,
550
+ character_name,
551
+ info_type,
552
+ selected_characters,
553
+ characters,
554
+ gallery,
555
+ selected_chars_container,
556
+ all_characters_state,
557
+ ],
558
+ ).then(
559
+ fn=update_selected_chars_display,
560
+ inputs=[selected_characters],
561
+ outputs=[item for row in selected_chars_rows for item in row],
562
+ )
563
+
564
+ gallery.select(
565
+ fn=on_select,
566
+ inputs=[characters, selected_characters, all_characters_state],
567
+ outputs=[character_name, info_type, current_character, selected_characters],
568
+ ).then(
569
+ fn=update_prompt_audio, inputs=[current_character], outputs=[prompt_audio]
570
+ )
571
+
572
+ info_type.change(
573
+ fn=update_character_info,
574
+ inputs=[character_name, info_type, current_character, all_characters_state],
575
+ outputs=[current_character, all_characters_state],
576
+ ).then(
577
+ fn=update_prompt_audio, inputs=[current_character], outputs=[prompt_audio]
578
+ )
579
+
580
+ for i, (_, _, delete_btn, _) in enumerate(selected_chars_rows):
581
+ delete_btn.click(
582
+ fn=remove_character,
583
+ inputs=[gr.Number(value=i, visible=False), selected_characters],
584
+ outputs=[selected_characters, selected_chars_container],
585
+ ).then(
586
+ fn=update_selected_chars_display,
587
+ inputs=[selected_characters],
588
+ outputs=[item for row in selected_chars_rows for item in row],
589
+ )
590
+
591
+ kind.change(
592
+ fn=update_gallery,
593
+ inputs=[kind, query, all_characters_state],
594
+ outputs=[characters, gallery, all_characters_state],
595
+ )
596
+
597
+ query.change(
598
+ fn=update_gallery,
599
+ inputs=[kind, query, all_characters_state],
600
+ outputs=[characters, gallery, all_characters_state],
601
+ )
602
+
603
+ create_button.click(
604
+ fn=create_voice,
605
+ inputs=[avatar, name, emotion, tags, gender, audio_data, lang],
606
+ outputs=[avatar, name, emotion, tags, gender, audio_data],
607
+ )
608
+
609
+
610
+ if __name__ == "__main__":
611
+ demo.queue(default_concurrency_limit=None).launch(
612
+ show_api=False
613
+ )
i18n/i18n.py CHANGED
@@ -225,7 +225,10 @@ def translate_blocks(
225
  TranslateContext.add_translation(translation)
226
 
227
  def on_load(request: gr.Request):
228
- return get_lang_from_request(request)
 
 
 
229
 
230
  def on_lang_change(request: gr.Request, lang: str):
231
  TranslateContext.lang_per_session[request.session_hash] = lang
 
225
  TranslateContext.add_translation(translation)
226
 
227
  def on_load(request: gr.Request):
228
+ lang = get_lang_from_request(request)
229
+ if lang not in translation.keys():
230
+ lang = "en"
231
+ return lang
232
 
233
  def on_lang_change(request: gr.Request, lang: str):
234
  TranslateContext.lang_per_session[request.session_hash] = lang