Yurii Paniv commited on
Commit
01e1229
1 Parent(s): b812929

Add initial model

Browse files
Files changed (6) hide show
  1. .gitignore +6 -0
  2. README.md +4 -1
  3. app.py +61 -14
  4. crh_tts/__init__.py +0 -0
  5. crh_tts/tts.py +84 -0
  6. requirements.txt +3 -1
.gitignore CHANGED
@@ -127,3 +127,9 @@ dmypy.json
127
 
128
  # Pyre type checker
129
  .pyre/
 
 
 
 
 
 
 
127
 
128
  # Pyre type checker
129
  .pyre/
130
+
131
+
132
+ # Model files
133
+ config.json
134
+ speakers.pth
135
+ model.pth
README.md CHANGED
@@ -19,4 +19,7 @@ Online demo: https://huggingface.co/spaces/robinhad/qirimli-tts
19
 
20
  # Attribution
21
 
22
- Transliteration: [prosvita/crh.transliteration](https://github.com/prosvita/crh.transliteration)
 
 
 
 
19
 
20
  # Attribution
21
 
22
+ - Model training - [Yurii Paniv @robinhad](https://github.com/robinhad)
23
+ - Crimean Tatar dataset - [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv)
24
+ - Huge thanks for voice to: Nuri, Arslan, Kemal
25
+ - Transliteration: [prosvita/crh.transliteration](https://github.com/prosvita/crh.transliteration)
app.py CHANGED
@@ -1,41 +1,88 @@
1
- from unittest import result
2
  import gradio as gr
3
  from crh_transliterator.transliterator import transliterate
4
  from crh_preprocessor.preprocessor import preprocess
5
  from datetime import datetime
6
 
 
 
 
 
 
 
7
 
8
- def tts(text: str) -> str:
9
- result = transliterate(text)
10
- text = preprocess(result)
11
- print("============================")
12
- print("Original text:", text)
13
- print("Time:", datetime.utcnow())
14
- return text
 
15
 
16
 
17
  badge = (
18
  "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=robinhad.qirimli-tts"
19
  )
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  with open("README.md") as file:
22
  article = file.read()
23
  article = article[article.find("---\n", 4) + 5 : :]
24
 
 
25
  iface = gr.Interface(
26
  fn=tts,
27
  inputs=[
28
  gr.components.Textbox(
29
  label="Input",
30
- value="Please input your sentence.",
 
 
 
 
 
31
  ),
32
  ],
33
- outputs="text",
34
- examples=[
35
- ["Selâm! İşler nasıl?"],
36
- ["Sağlıqnen qalıñız! Sağlıqnen barıñız! "],
37
- ["Селям! Ишлер насыл?"],
38
  ],
 
 
39
  article=article + f'\n <center><img src="{badge}" alt="visitors badge"/></center>',
 
 
 
 
 
 
 
 
40
  )
41
  iface.launch()
 
 
1
  import gradio as gr
2
  from crh_transliterator.transliterator import transliterate
3
  from crh_preprocessor.preprocessor import preprocess
4
  from datetime import datetime
5
 
6
+ import tempfile
7
+ import gradio as gr
8
+ from datetime import datetime
9
+ from enum import Enum
10
+ from crh_tts.tts import TTS, Voices
11
+ from torch.cuda import is_available
12
 
13
+
14
+ class VoiceOption(Enum):
15
+ Nuri = "Нурі (жіночий) 👩"
16
+ Arslan = "Арслан (чоловічий) 👨"
17
+ Kemal = "Кемаль (чоловічий) 👨"
18
+
19
+
20
+ print(f"CUDA available? {is_available()}")
21
 
22
 
23
  badge = (
24
  "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=robinhad.qirimli-tts"
25
  )
26
 
27
+ crh_tts = TTS(use_cuda=is_available())
28
+
29
+
30
+ def tts(text: str, voice: str):
31
+ print("============================")
32
+ print("Original text:", text)
33
+ print("Voice", voice)
34
+ print("Time:", datetime.utcnow())
35
+
36
+ voice_mapping = {
37
+ VoiceOption.Nuri.value: Voices.Nuri.value,
38
+ VoiceOption.Arslan.value: Voices.Arslan.value,
39
+ VoiceOption.Kemal.value: Voices.Kemal.value,
40
+ }
41
+
42
+ speaker_name = voice_mapping[voice]
43
+ text_limit = 7200
44
+ text = (
45
+ text if len(text) < text_limit else text[0:text_limit]
46
+ ) # mitigate crashes on hf space
47
+ result = transliterate(text)
48
+ text = preprocess(result)
49
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
50
+ _, text = crh_tts.tts(text, speaker_name, fp)
51
+ return fp.name, text
52
+
53
+
54
  with open("README.md") as file:
55
  article = file.read()
56
  article = article[article.find("---\n", 4) + 5 : :]
57
 
58
+
59
  iface = gr.Interface(
60
  fn=tts,
61
  inputs=[
62
  gr.components.Textbox(
63
  label="Input",
64
+ value="Qırımtatarlar! Селям! Ишлер насыл?",
65
+ ),
66
+ gr.components.Radio(
67
+ label="Голос",
68
+ choices=[option.value for option in VoiceOption],
69
+ value=VoiceOption.Nuri.value,
70
  ),
71
  ],
72
+ outputs=[
73
+ gr.components.Audio(label="Output"),
74
+ gr.components.Textbox(label="Наголошений текст"),
 
 
75
  ],
76
+ title="Кримськотатарський синтез мовлення",
77
+ description="Кримськотатарський Text-to-Speech за допомогою Coqui TTS",
78
  article=article + f'\n <center><img src="{badge}" alt="visitors badge"/></center>',
79
+ examples=[
80
+ ["Selâm! İşler nasıl?", VoiceOption.Kemal.value],
81
+ [
82
+ "Qırımtatarlar üç subetnik gruppasından er birisiniñ (tatlar, noğaylar ve yalıboylular) öz şivesi bar.",
83
+ VoiceOption.Arslan.value,
84
+ ],
85
+ ["Селям! Ишлер насыл?", VoiceOption.Nuri.value],
86
+ ],
87
  )
88
  iface.launch()
crh_tts/__init__.py ADDED
File without changes
crh_tts/tts.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+ import requests
3
+ from os.path import exists, join
4
+ from TTS.utils.synthesizer import Synthesizer
5
+ from enum import Enum
6
+ from crh_preprocessor.preprocessor import preprocess
7
+ from torch import no_grad
8
+
9
+
10
+ class Voices(Enum):
11
+ """List of available voices for the model."""
12
+
13
+ Arslan = "arslan"
14
+ Nuri = "nuri"
15
+ Kemal = "kemal"
16
+
17
+
18
+ class TTS:
19
+ """ """
20
+
21
+ def __init__(self, use_cuda=False) -> None:
22
+ """
23
+ Class to setup a text-to-speech engine, from download to model creation. \n
24
+ Downloads or uses files from `cache_folder` directory. \n
25
+ By default stores in current directory."""
26
+ self.__setup_cache(use_cuda=use_cuda)
27
+
28
+ def tts(self, text: str, voice: str, output_fp=BytesIO()):
29
+ """
30
+ Run a Text-to-Speech engine and output to `output_fp` BytesIO-like object.
31
+ - `text` - your model input text.
32
+ - `voice` - one of predefined voices from `Voices` enum.
33
+ - `output_fp` - file-like object output. Stores in RAM by default.
34
+ """
35
+
36
+ if voice not in [option.value for option in Voices]:
37
+ raise ValueError(
38
+ f"Invalid value for voice selected! Please use one of the following values: {', '.join([option.value for option in Voices])}."
39
+ )
40
+
41
+ text = preprocess(text)
42
+
43
+ with no_grad():
44
+ wavs = self.synthesizer.tts(text, speaker_name=voice)
45
+ self.synthesizer.save_wav(wavs, output_fp)
46
+
47
+ output_fp.seek(0)
48
+
49
+ return output_fp, text
50
+
51
+ def __setup_cache(self, use_cuda=False):
52
+ """Downloads models and stores them into `cache_folder`. By default stores in current directory."""
53
+ print("downloading uk/crh/vits-tts")
54
+ release_number = "v0.0.1"
55
+ model_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/model.pth"
56
+ config_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/config.json"
57
+ speakers_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/speakers.pth"
58
+
59
+ cache_folder = "."
60
+
61
+ model_path = join(cache_folder, "model.pth")
62
+ config_path = join(cache_folder, "config.json")
63
+ speakers_path = join(cache_folder, "speakers.pth")
64
+
65
+ self.__download(model_link, model_path)
66
+ self.__download(config_link, config_path)
67
+ self.__download(speakers_link, speakers_path)
68
+
69
+ self.synthesizer = Synthesizer(
70
+ model_path, config_path, speakers_path, None, None, use_cuda=use_cuda
71
+ )
72
+
73
+ if self.synthesizer is None:
74
+ raise NameError("Model not found")
75
+
76
+ def __download(self, url, file_name):
77
+ """Downloads file from `url` into local `file_name` file."""
78
+ if not exists(file_name):
79
+ print(f"Downloading {file_name}")
80
+ r = requests.get(url, allow_redirects=True)
81
+ with open(file_name, "wb") as file:
82
+ file.write(r.content)
83
+ else:
84
+ print(f"Found {file_name}. Skipping download...")
requirements.txt CHANGED
@@ -1 +1,3 @@
1
- gradio==3.6
 
 
 
1
+ gradio==3.6
2
+ torch>=1.13
3
+ TTS==0.8.0