Spaces:

faelfernandes
/

TTS-TSM

Running

File size: 7,485 Bytes

from fastapi import FastAPI, Response
from fastapi.responses import HTMLResponse
import edge_tts
import asyncio
import uvicorn
from pathlib import Path
import os

app = FastAPI()

HTML_CONTENT = """
<!DOCTYPE html>
<html lang="pt-BR">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>TSM - Texto em Voz</title>
    <style>
        * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
            font-family: Arial, sans-serif;
        }

        body {
            background-color: #f0f0f0;
            padding: 20px;
        }

        .container {
            max-width: 800px;
            margin: 0 auto;
            background: white;
            padding: 30px;
            border-radius: 10px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
        }

        h1 {
            color: #2c3e50;
            text-align: center;
            margin-bottom: 20px;
        }

        .subtitle {
            text-align: center;
            color: #666;
            margin-bottom: 30px;
        }

        .input-group {
            display: grid;
            grid-template-columns: 2fr 1fr;
            gap: 20px;
            margin-bottom: 20px;
        }

        @media (max-width: 768px) {
            .input-group {
                grid-template-columns: 1fr;
            }
        }

        textarea {
            width: 100%;
            height: 150px;
            padding: 15px;
            border: 1px solid #ddd;
            border-radius: 5px;
            resize: vertical;
            font-size: 16px;
        }

        .voice-selector {
            padding: 20px;
            background: #f8f9fa;
            border-radius: 5px;
        }

        .voice-option {
            display: block;
            margin: 10px 0;
            cursor: pointer;
        }

        .convert-btn {
            display: block;
            width: 100%;
            padding: 15px;
            background: #2196F3;
            color: white;
            border: none;
            border-radius: 5px;
            font-size: 16px;
            cursor: pointer;
            transition: background 0.3s;
            margin: 20px 0;
        }

        .convert-btn:hover {
            background: #1976D2;
        }

        .convert-btn:disabled {
            background: #ccc;
            cursor: not-allowed;
        }

        audio {
            width: 100%;
            margin: 20px 0;
        }

        .footer {
            text-align: center;
            margin-top: 30px;
            color: #666;
            font-size: 14px;
        }

        #loading {
            display: none;
            text-align: center;
            margin: 10px 0;
            color: #666;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>TSM - Texto em Voz</h1>
        <p class="subtitle">Converta texto em fala usando vozes em português e multilíngues</p>

        <div class="input-group">
            <div>
                <textarea id="text-input" placeholder="Digite o texto para converter em fala..."></textarea>
            </div>
            
            <div class="voice-selector">
                <h3>Escolha a voz:</h3>
                <label class="voice-option">
                    <input type="radio" name="voice" value="pt-BR-AntonioNeural" checked>
                    Antonio
                </label>
                <label class="voice-option">
                    <input type="radio" name="voice" value="en-US-AndrewMultilingualNeural" checked>
                    Andrew Multilingual
                </label>
                <label class="voice-option">
                    <input type="radio" name="voice" value="en-US-BrianMultilingualNeural" checked>
                    Brian Multilingual
                </label>
                    <input type="radio" name="voice" value="pt-BR-FranciscaNeural">
                    Francisca
                </label>
                <label class="voice-option">
                    <input type="radio" name="voice" value="pt-BR-ThalitaNeural">
                    Thalita
                </label>
                <label class="voice-option">
                    <input type="radio" name="voice" value="en-US-AvaMultilingualNeural">
                    Ava Multilingual
                </label>
                <label class="voice-option">
                    <input type="radio" name="voice" value="en-US-EmmaMultilingualNeural">
                    Emma Multilingual
            </div>
        </div>

        <button id="convert-btn" class="convert-btn">Converter para Áudio</button>
        <div id="loading">Gerando áudio...</div>
        <audio id="audio-output" controls style="display: none;"></audio>

        <div class="footer">
            <p>Desenvolvido por [TSM LTDA] © 2022-2024</p>
            <p>Powered by Azure Text-to-Speech</p>
        </div>
    </div>

    <script>
        const textInput = document.getElementById('text-input');
        const convertBtn = document.getElementById('convert-btn');
        const audioOutput = document.getElementById('audio-output');
        const loading = document.getElementById('loading');

        convertBtn.addEventListener('click', async () => {
            const text = textInput.value.trim();
            if (!text) {
                alert('Por favor, digite algum texto para converter.');
                return;
            }

            const voice = document.querySelector('input[name="voice"]:checked').value;
            
            loading.style.display = 'block';
            convertBtn.disabled = true;
            audioOutput.style.display = 'none';

            try {
                const response = await fetch('/synthesize', {
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/json',
                    },
                    body: JSON.stringify({ text, voice })
                });

                if (!response.ok) {
                    throw new Error('Erro ao gerar áudio');
                }

                const audioBlob = await response.blob();
                const audioUrl = URL.createObjectURL(audioBlob);
                
                audioOutput.src = audioUrl;
                audioOutput.style.display = 'block';
            } catch (error) {
                alert('Erro ao converter texto para fala: ' + error.message);
            } finally {
                loading.style.display = 'none';
                convertBtn.disabled = false;
            }
        });
    </script>
</body>
</html>
"""

@app.get("/", response_class=HTMLResponse)
async def read_root():
    return HTML_CONTENT

@app.post("/synthesize")
async def synthesize_speech(request_data: dict):
    try:
        text = request_data.get("text", "")
        voice = request_data.get("voice", "pt-BR-AntonioNeural")
        
        output_file = f"temp_{hash(text + voice)}.mp3"
        
        communicate = edge_tts.Communicate(text, voice)
        await communicate.save(output_file)
        
        with open(output_file, "rb") as f:
            audio_data = f.read()
            
        os.remove(output_file)
        
        return Response(content=audio_data, media_type="audio/mpeg")
    except Exception as e:
        return Response(content=str(e), status_code=500)

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)