Fredric's picture
add to deploy into vercel
33c4cc8
raw
history blame
2.59 kB
import gradio as gr
from pathlib import Path
from openai import OpenAI
import os
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# Initialize the OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
if not client.api_key:
raise ValueError("Please set the OPENAI_API_KEY in your .env file")
def generate_versions(text):
prompt = f"""Given the original text: "{text}"
Generate two rephrased versions:
1. A slightly more emotional version (ex. "μœ„ν—˜ν•΄μš”" -> "μœ„ν—˜ν•΄μš”!!")
2. An exaggerated, highly emotional version (ex. "μœ„ν—˜ν•΄μš”" -> "μž κΉλ§Œμš”! μ•ˆλΌ, μœ„ν—˜ν•΄μš”!!")
Output format:
Original: [original text]
Emotional: [emotional version]
Exaggerated: [exaggerated version]"""
stream = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
stream=True,
)
full_response = ""
for chunk in stream:
if chunk.choices[0].delta.content is not None:
full_response += chunk.choices[0].delta.content
versions = full_response.split('\n')
return [v.split(': ', 1)[1] for v in versions if ': ' in v]
def text_to_speech(text):
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=text
)
return response.content
def process_and_generate(text):
versions = generate_versions(text)
audio_contents = [text_to_speech(v) for v in versions]
return versions + audio_contents + ["All versions generated successfully!"]
with gr.Blocks(title="Emotional TTS Comparison") as demo:
gr.Markdown("# Emotional TTS Comparison")
gr.Markdown("Enter text to generate three versions with varying emotional intensity.")
input_text = gr.Textbox(label="Original Text", lines=3)
generate_btn = gr.Button("Generate Versions and Speech")
with gr.Row():
text1 = gr.Textbox(label="Original Version")
text2 = gr.Textbox(label="Emotional Version")
text3 = gr.Textbox(label="Exaggerated Version")
with gr.Row():
audio1 = gr.Audio(label="Original Speech")
audio2 = gr.Audio(label="Emotional Speech")
audio3 = gr.Audio(label="Exaggerated Speech")
status = gr.Textbox(label="Status")
generate_btn.click(
process_and_generate,
inputs=[input_text],
outputs=[text1, text2, text3, audio1, audio2, audio3, status]
)
if __name__ == "__main__":
demo.launch()
else:
app = gr.mount_gradio_app(demo, "/")