TTS-TSM / app.py
faelfernandes's picture
Update app.py
ed136ea verified
raw
history blame
7.41 kB
from fastapi import FastAPI, Response
from fastapi.staticfiles import StaticFiles
from fastapi.responses import HTMLResponse
import edge_tts
import asyncio
import uvicorn
from pathlib import Path
import os
app = FastAPI()
# Servir arquivos estáticos
app.mount("/static", StaticFiles(directory="static"), name="static")
# HTML como string (vou mostrar o conteúdo abaixo)
HTML_CONTENT = """
<!DOCTYPE html>
... # (conteúdo HTML que mostrarei em seguida)
"""
@app.get("/", response_class=HTMLResponse)
async def read_root():
return HTML_CONTENT
@app.post("/synthesize")
async def synthesize_speech(request_data: dict):
try:
text = request_data.get("text", "")
voice = request_data.get("voice", "pt-BR-FranciscaNeural")
# Gerar nome único para o arquivo
output_file = f"temp_{hash(text + voice)}.mp3"
# Criar comunicação com edge-tts
communicate = edge_tts.Communicate(text, voice)
await communicate.save(output_file)
# Ler o arquivo
with open(output_file, "rb") as f:
audio_data = f.read()
# Limpar o arquivo temporário
os.remove(output_file)
return Response(content=audio_data, media_type="audio/mpeg")
except Exception as e:
return Response(content=str(e), status_code=500)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)
<!DOCTYPE html>
<html lang="pt-BR">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>TSM - Texto em Voz</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
font-family: Arial, sans-serif;
}
body {
background-color: #f0f0f0;
padding: 20px;
}
.container {
max-width: 800px;
margin: 0 auto;
background: white;
padding: 30px;
border-radius: 10px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}
h1 {
color: #2c3e50;
text-align: center;
margin-bottom: 20px;
}
.subtitle {
text-align: center;
color: #666;
margin-bottom: 30px;
}
.input-group {
display: grid;
grid-template-columns: 2fr 1fr;
gap: 20px;
margin-bottom: 20px;
}
@media (max-width: 768px) {
.input-group {
grid-template-columns: 1fr;
}
}
textarea {
width: 100%;
height: 150px;
padding: 15px;
border: 1px solid #ddd;
border-radius: 5px;
resize: vertical;
font-size: 16px;
}
.voice-selector {
padding: 20px;
background: #f8f9fa;
border-radius: 5px;
}
.voice-option {
display: block;
margin: 10px 0;
cursor: pointer;
}
.convert-btn {
display: block;
width: 100%;
padding: 15px;
background: #2196F3;
color: white;
border: none;
border-radius: 5px;
font-size: 16px;
cursor: pointer;
transition: background 0.3s;
margin: 20px 0;
}
.convert-btn:hover {
background: #1976D2;
}
.convert-btn:disabled {
background: #ccc;
cursor: not-allowed;
}
audio {
width: 100%;
margin: 20px 0;
}
.footer {
text-align: center;
margin-top: 30px;
color: #666;
font-size: 14px;
}
#loading {
display: none;
text-align: center;
margin: 10px 0;
color: #666;
}
</style>
</head>
<body>
<div class="container">
<h1>TSM - Texto em Voz</h1>
<p class="subtitle">Converta texto em fala usando vozes em português e multilíngues</p>
<div class="input-group">
<div>
<textarea id="text-input" placeholder="Digite o texto para converter em fala..."></textarea>
</div>
<div class="voice-selector">
<h3>Escolha a voz:</h3>
<label class="voice-option">
<input type="radio" name="voice" value="pt-BR-AntonioNeural" checked>
Antonio
</label>
<label class="voice-option">
<input type="radio" name="voice" value="pt-BR-FranciscaNeural">
Francisca
</label>
<label class="voice-option">
<input type="radio" name="voice" value="pt-BR-ThalitaNeural">
Thalita
</label>
<label class="voice-option">
<input type="radio" name="voice" value="en-US-AndrewMultilingualNeural">
Andrew (Multilingual)
</label>
</div>
</div>
<button id="convert-btn" class="convert-btn">Converter para Áudio</button>
<div id="loading">Gerando áudio...</div>
<audio id="audio-output" controls style="display: none;"></audio>
<div class="footer">
<p>Desenvolvido por [TSM LTDA] © 2022-2024</p>
<p>Powered by Azure Text-to-Speech</p>
</div>
</div>
<script>
const textInput = document.getElementById('text-input');
const convertBtn = document.getElementById('convert-btn');
const audioOutput = document.getElementById('audio-output');
const loading = document.getElementById('loading');
convertBtn.addEventListener('click', async () => {
const text = textInput.value.trim();
if (!text) {
alert('Por favor, digite algum texto para converter.');
return;
}
const voice = document.querySelector('input[name="voice"]:checked').value;
// Mostrar loading e desabilitar botão
loading.style.display = 'block';
convertBtn.disabled = true;
audioOutput.style.display = 'none';
try {
const response = await fetch('/synthesize', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ text, voice })
});
if (!response.ok) {
throw new Error('Erro ao gerar áudio');
}
const audioBlob = await response.blob();
const audioUrl = URL.createObjectURL(audioBlob);
audioOutput.src = audioUrl;
audioOutput.style.display = 'block';
} catch (error) {
alert('Erro ao converter texto para fala: ' + error.message);
} finally {
loading.style.display = 'none';
convertBtn.disabled = false;
}
});
</script>
</body>
</html>