faelfernandes commited on
Commit
ed136ea
1 Parent(s): 2400931

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +247 -85
app.py CHANGED
@@ -1,96 +1,258 @@
1
- import gradio as gr
 
 
2
  import edge_tts
3
  import asyncio
 
 
4
  import os
5
 
6
- # Vozes disponíveis
7
- VOICES = {
8
- "Antonio": "pt-BR-AntonioNeural",
9
- "Andrew (Multilingual)": "en-US-AndrewMultilingualNeural",
10
- "Brian (Multilingual)": "en-US-BrianMultilingualNeural",
11
- "Francisca": "pt-BR-FranciscaNeural",
12
- "Thalita": "pt-BR-ThalitaNeural",
13
- "Ava (Multilingual)": "en-US-AvaMultilingualNeural",
14
- "Emma (Multilingual)": "en-US-EmmaMultilingualNeural"
15
- }
16
-
17
- async def text_to_speech(text, voice):
18
- voice_key = VOICES[voice]
19
- output_file = f"output_{hash(text)}.mp3"
20
- communicate = edge_tts.Communicate(text, voice_key)
21
- await communicate.save(output_file)
22
- return output_file
23
-
24
- async def process_tts(text, voice):
25
- return await text_to_speech(text, voice)
26
-
27
- css = """
28
- .gradio-container {
29
- background: white !important;
30
- }
31
-
32
- .label {
33
- color: black !important;
34
- }
35
-
36
- .gr-box {
37
- background: white !important;
38
- color: black !important;
39
- }
40
-
41
- .gr-input {
42
- color: black !important;
43
- }
44
-
45
- .gr-button {
46
- background: #2196F3 !important;
47
- color: white !important;
48
- }
49
-
50
- .gr-form {
51
- background: white !important;
52
- color: black !important;
53
- }
54
-
55
- .footer {
56
- text-align: center;
57
- margin-top: 50px;
58
- color: #666 !important;
59
- font-size: 0.9em;
60
- }
61
  """
62
 
63
- with gr.Blocks(theme=gr.themes.Light(), css=css) as iface:
64
- with gr.Column():
65
- gr.Markdown("# TSM - Texto em Voz")
66
- gr.Markdown("Converta texto em fala usando vozes em português e multilíngues.")
 
 
 
 
 
 
 
 
67
 
68
- with gr.Row():
69
- with gr.Column(scale=2):
70
- text_input = gr.Textbox(
71
- label="Digite o texto para converter em fala",
72
- placeholder="Escreva seu texto aqui...",
73
- lines=5
74
- )
75
- with gr.Column(scale=1):
76
- voice = gr.Radio(
77
- choices=list(VOICES.keys()),
78
- label="Escolha a voz",
79
- value="Francisca"
80
- )
81
 
82
- convert_button = gr.Button("Converter para Áudio")
83
- audio_output = gr.Audio(label="Áudio Gerado", interactive=True)
 
 
 
 
84
 
85
- gr.Markdown(
86
- """
87
- <div class="footer">
88
- <p>Desenvolvido por [TSM LTDA] © 2022-2024</p>
89
- <p>Powered by Azure Text-to-Speech</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  </div>
91
- """
92
- )
93
-
94
- convert_button.click(process_tts, inputs=[text_input, voice], outputs=[audio_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
- iface.launch(show_api=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Response
2
+ from fastapi.staticfiles import StaticFiles
3
+ from fastapi.responses import HTMLResponse
4
  import edge_tts
5
  import asyncio
6
+ import uvicorn
7
+ from pathlib import Path
8
  import os
9
 
10
+ app = FastAPI()
11
+
12
+ # Servir arquivos estáticos
13
+ app.mount("/static", StaticFiles(directory="static"), name="static")
14
+
15
+ # HTML como string (vou mostrar o conteúdo abaixo)
16
+ HTML_CONTENT = """
17
+ <!DOCTYPE html>
18
+ ... # (conteúdo HTML que mostrarei em seguida)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  """
20
 
21
+ @app.get("/", response_class=HTMLResponse)
22
+ async def read_root():
23
+ return HTML_CONTENT
24
+
25
+ @app.post("/synthesize")
26
+ async def synthesize_speech(request_data: dict):
27
+ try:
28
+ text = request_data.get("text", "")
29
+ voice = request_data.get("voice", "pt-BR-FranciscaNeural")
30
+
31
+ # Gerar nome único para o arquivo
32
+ output_file = f"temp_{hash(text + voice)}.mp3"
33
 
34
+ # Criar comunicação com edge-tts
35
+ communicate = edge_tts.Communicate(text, voice)
36
+ await communicate.save(output_file)
 
 
 
 
 
 
 
 
 
 
37
 
38
+ # Ler o arquivo
39
+ with open(output_file, "rb") as f:
40
+ audio_data = f.read()
41
+
42
+ # Limpar o arquivo temporário
43
+ os.remove(output_file)
44
 
45
+ return Response(content=audio_data, media_type="audio/mpeg")
46
+ except Exception as e:
47
+ return Response(content=str(e), status_code=500)
48
+
49
+ if __name__ == "__main__":
50
+ uvicorn.run(app, host="0.0.0.0", port=7860)
51
+
52
+ <!DOCTYPE html>
53
+ <html lang="pt-BR">
54
+ <head>
55
+ <meta charset="UTF-8">
56
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
57
+ <title>TSM - Texto em Voz</title>
58
+ <style>
59
+ * {
60
+ margin: 0;
61
+ padding: 0;
62
+ box-sizing: border-box;
63
+ font-family: Arial, sans-serif;
64
+ }
65
+
66
+ body {
67
+ background-color: #f0f0f0;
68
+ padding: 20px;
69
+ }
70
+
71
+ .container {
72
+ max-width: 800px;
73
+ margin: 0 auto;
74
+ background: white;
75
+ padding: 30px;
76
+ border-radius: 10px;
77
+ box-shadow: 0 2px 10px rgba(0,0,0,0.1);
78
+ }
79
+
80
+ h1 {
81
+ color: #2c3e50;
82
+ text-align: center;
83
+ margin-bottom: 20px;
84
+ }
85
+
86
+ .subtitle {
87
+ text-align: center;
88
+ color: #666;
89
+ margin-bottom: 30px;
90
+ }
91
+
92
+ .input-group {
93
+ display: grid;
94
+ grid-template-columns: 2fr 1fr;
95
+ gap: 20px;
96
+ margin-bottom: 20px;
97
+ }
98
+
99
+ @media (max-width: 768px) {
100
+ .input-group {
101
+ grid-template-columns: 1fr;
102
+ }
103
+ }
104
+
105
+ textarea {
106
+ width: 100%;
107
+ height: 150px;
108
+ padding: 15px;
109
+ border: 1px solid #ddd;
110
+ border-radius: 5px;
111
+ resize: vertical;
112
+ font-size: 16px;
113
+ }
114
+
115
+ .voice-selector {
116
+ padding: 20px;
117
+ background: #f8f9fa;
118
+ border-radius: 5px;
119
+ }
120
+
121
+ .voice-option {
122
+ display: block;
123
+ margin: 10px 0;
124
+ cursor: pointer;
125
+ }
126
+
127
+ .convert-btn {
128
+ display: block;
129
+ width: 100%;
130
+ padding: 15px;
131
+ background: #2196F3;
132
+ color: white;
133
+ border: none;
134
+ border-radius: 5px;
135
+ font-size: 16px;
136
+ cursor: pointer;
137
+ transition: background 0.3s;
138
+ margin: 20px 0;
139
+ }
140
+
141
+ .convert-btn:hover {
142
+ background: #1976D2;
143
+ }
144
+
145
+ .convert-btn:disabled {
146
+ background: #ccc;
147
+ cursor: not-allowed;
148
+ }
149
+
150
+ audio {
151
+ width: 100%;
152
+ margin: 20px 0;
153
+ }
154
+
155
+ .footer {
156
+ text-align: center;
157
+ margin-top: 30px;
158
+ color: #666;
159
+ font-size: 14px;
160
+ }
161
+
162
+ #loading {
163
+ display: none;
164
+ text-align: center;
165
+ margin: 10px 0;
166
+ color: #666;
167
+ }
168
+ </style>
169
+ </head>
170
+ <body>
171
+ <div class="container">
172
+ <h1>TSM - Texto em Voz</h1>
173
+ <p class="subtitle">Converta texto em fala usando vozes em português e multilíngues</p>
174
+
175
+ <div class="input-group">
176
+ <div>
177
+ <textarea id="text-input" placeholder="Digite o texto para converter em fala..."></textarea>
178
+ </div>
179
+
180
+ <div class="voice-selector">
181
+ <h3>Escolha a voz:</h3>
182
+ <label class="voice-option">
183
+ <input type="radio" name="voice" value="pt-BR-AntonioNeural" checked>
184
+ Antonio
185
+ </label>
186
+ <label class="voice-option">
187
+ <input type="radio" name="voice" value="pt-BR-FranciscaNeural">
188
+ Francisca
189
+ </label>
190
+ <label class="voice-option">
191
+ <input type="radio" name="voice" value="pt-BR-ThalitaNeural">
192
+ Thalita
193
+ </label>
194
+ <label class="voice-option">
195
+ <input type="radio" name="voice" value="en-US-AndrewMultilingualNeural">
196
+ Andrew (Multilingual)
197
+ </label>
198
  </div>
199
+ </div>
200
+
201
+ <button id="convert-btn" class="convert-btn">Converter para Áudio</button>
202
+ <div id="loading">Gerando áudio...</div>
203
+ <audio id="audio-output" controls style="display: none;"></audio>
204
+
205
+ <div class="footer">
206
+ <p>Desenvolvido por [TSM LTDA] © 2022-2024</p>
207
+ <p>Powered by Azure Text-to-Speech</p>
208
+ </div>
209
+ </div>
210
+
211
+ <script>
212
+ const textInput = document.getElementById('text-input');
213
+ const convertBtn = document.getElementById('convert-btn');
214
+ const audioOutput = document.getElementById('audio-output');
215
+ const loading = document.getElementById('loading');
216
+
217
+ convertBtn.addEventListener('click', async () => {
218
+ const text = textInput.value.trim();
219
+ if (!text) {
220
+ alert('Por favor, digite algum texto para converter.');
221
+ return;
222
+ }
223
+
224
+ const voice = document.querySelector('input[name="voice"]:checked').value;
225
+
226
+ // Mostrar loading e desabilitar botão
227
+ loading.style.display = 'block';
228
+ convertBtn.disabled = true;
229
+ audioOutput.style.display = 'none';
230
+
231
+ try {
232
+ const response = await fetch('/synthesize', {
233
+ method: 'POST',
234
+ headers: {
235
+ 'Content-Type': 'application/json',
236
+ },
237
+ body: JSON.stringify({ text, voice })
238
+ });
239
+
240
+ if (!response.ok) {
241
+ throw new Error('Erro ao gerar áudio');
242
+ }
243
 
244
+ const audioBlob = await response.blob();
245
+ const audioUrl = URL.createObjectURL(audioBlob);
246
+
247
+ audioOutput.src = audioUrl;
248
+ audioOutput.style.display = 'block';
249
+ } catch (error) {
250
+ alert('Erro ao converter texto para fala: ' + error.message);
251
+ } finally {
252
+ loading.style.display = 'none';
253
+ convertBtn.disabled = false;
254
+ }
255
+ });
256
+ </script>
257
+ </body>
258
+ </html>