File size: 2,588 Bytes
0a48c54 33c4cc8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import gradio as gr
from pathlib import Path
from openai import OpenAI
import os
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# Initialize the OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
if not client.api_key:
raise ValueError("Please set the OPENAI_API_KEY in your .env file")
def generate_versions(text):
prompt = f"""Given the original text: "{text}"
Generate two rephrased versions:
1. A slightly more emotional version (ex. "μνν΄μ" -> "μνν΄μ!!")
2. An exaggerated, highly emotional version (ex. "μνν΄μ" -> "μ κΉλ§μ! μλΌ, μνν΄μ!!")
Output format:
Original: [original text]
Emotional: [emotional version]
Exaggerated: [exaggerated version]"""
stream = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
stream=True,
)
full_response = ""
for chunk in stream:
if chunk.choices[0].delta.content is not None:
full_response += chunk.choices[0].delta.content
versions = full_response.split('\n')
return [v.split(': ', 1)[1] for v in versions if ': ' in v]
def text_to_speech(text):
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=text
)
return response.content
def process_and_generate(text):
versions = generate_versions(text)
audio_contents = [text_to_speech(v) for v in versions]
return versions + audio_contents + ["All versions generated successfully!"]
with gr.Blocks(title="Emotional TTS Comparison") as demo:
gr.Markdown("# Emotional TTS Comparison")
gr.Markdown("Enter text to generate three versions with varying emotional intensity.")
input_text = gr.Textbox(label="Original Text", lines=3)
generate_btn = gr.Button("Generate Versions and Speech")
with gr.Row():
text1 = gr.Textbox(label="Original Version")
text2 = gr.Textbox(label="Emotional Version")
text3 = gr.Textbox(label="Exaggerated Version")
with gr.Row():
audio1 = gr.Audio(label="Original Speech")
audio2 = gr.Audio(label="Emotional Speech")
audio3 = gr.Audio(label="Exaggerated Speech")
status = gr.Textbox(label="Status")
generate_btn.click(
process_and_generate,
inputs=[input_text],
outputs=[text1, text2, text3, audio1, audio2, audio3, status]
)
if __name__ == "__main__":
demo.launch()
else:
app = gr.mount_gradio_app(demo, "/") |