Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from transformers import AutoProcessor, AutoModel, BitsAndBytesConfig | |
import scipy.io.wavfile as wav | |
import numpy as np | |
import os | |
import openai | |
# Set OpenAI API key | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
# Check for GPU availability | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# Quantization config for speech model | |
if device == "cuda": | |
quantization_config = BitsAndBytesConfig(load_in_8bit=True) | |
else: | |
quantization_config = None | |
# Load speech model and processor | |
# speech_processor = AutoProcessor.from_pretrained("suno/bark-small") | |
# speech_model = AutoModel.from_pretrained( | |
# "suno/bark-small", | |
# device_map="auto" if device == "cuda" else None, | |
# quantization_config=quantization_config | |
# ) | |
# Move speech model to the appropriate device | |
# speech_model.to(device) | |
# Function to generate story using GPT | |
def generate_story(prompt): | |
model_input = f"""You are a creative and educational storyteller for school-going children. | |
Your task is to create an engaging, age-appropriate story that both entertains and teaches valuable lessons. | |
Use the following prompt as inspiration for your story, but feel free to be imaginative and expand upon it. | |
Remember to include educational elements that children can learn from, such as historical facts, scientific concepts, moral lessons, or cultural insights. | |
[Story Prompt Begin] | |
{prompt} | |
[Story Prompt End] | |
[Instruction Begin] | |
Generate a short, creative, and educational story based on this prompt. The story should be suitable for school-going children, entertaining, and contain clear learning points. | |
[Instruction End] | |
""" | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", # or "gpt-4" if you have access | |
messages=[ | |
{"role": "system", "content": "You are a skilled storyteller who creates educational and engaging stories for children."}, | |
{"role": "user", "content": model_input} | |
] | |
) | |
return response.choices[0].message['content'] | |
# Function to generate speech | |
def generate_speech(text, speaker="v2/en_speaker_6"): | |
inputs = speech_processor(text, voice_preset=speaker, return_tensors="pt").to(device) | |
speech_values = speech_model.generate(**inputs, do_sample=True) | |
audio_array = speech_values.cpu().numpy().squeeze() | |
sample_rate = speech_model.generation_config.sample_rate | |
return (sample_rate, audio_array) | |
# Gradio interface function | |
def text_to_speech(prompt): | |
story = generate_story(prompt) | |
# audio = generate_speech(story) | |
return story | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=text_to_speech, | |
inputs=gr.Textbox(label="Enter story prompt"), | |
outputs=[ | |
gr.Textbox(label="Generated Story"), | |
# gr.Audio(label="Generated Speech") | |
], | |
title="Story Generator and Text-to-Speech", | |
description="Enter a prompt to generate a story using GPT, then convert it to speech." | |
) | |
# Launch the app | |
iface.launch() |