Story-Come / app-base.py
ageraustine's picture
Rename app.py to app-base.py
df76a20 verified
raw
history blame
3.04 kB
import gradio as gr
import torch
from transformers import AutoProcessor, AutoModel, BitsAndBytesConfig
import scipy.io.wavfile as wav
import numpy as np
import os
import openai
# Set OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")
# Check for GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
# Quantization config for speech model
if device == "cuda":
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
else:
quantization_config = None
# Load speech model and processor
# speech_processor = AutoProcessor.from_pretrained("suno/bark-small")
# speech_model = AutoModel.from_pretrained(
# "suno/bark-small",
# device_map="auto" if device == "cuda" else None,
# quantization_config=quantization_config
# )
# Move speech model to the appropriate device
# speech_model.to(device)
# Function to generate story using GPT
def generate_story(prompt):
model_input = f"""You are a creative and educational storyteller for school-going children.
Your task is to create an engaging, age-appropriate story that both entertains and teaches valuable lessons.
Use the following prompt as inspiration for your story, but feel free to be imaginative and expand upon it.
Remember to include educational elements that children can learn from, such as historical facts, scientific concepts, moral lessons, or cultural insights.
[Story Prompt Begin]
{prompt}
[Story Prompt End]
[Instruction Begin]
Generate a short, creative, and educational story based on this prompt. The story should be suitable for school-going children, entertaining, and contain clear learning points.
[Instruction End]
"""
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo", # or "gpt-4" if you have access
messages=[
{"role": "system", "content": "You are a skilled storyteller who creates educational and engaging stories for children."},
{"role": "user", "content": model_input}
]
)
return response.choices[0].message['content']
# Function to generate speech
def generate_speech(text, speaker="v2/en_speaker_6"):
inputs = speech_processor(text, voice_preset=speaker, return_tensors="pt").to(device)
speech_values = speech_model.generate(**inputs, do_sample=True)
audio_array = speech_values.cpu().numpy().squeeze()
sample_rate = speech_model.generation_config.sample_rate
return (sample_rate, audio_array)
# Gradio interface function
def text_to_speech(prompt):
story = generate_story(prompt)
# audio = generate_speech(story)
return story
# Create Gradio interface
iface = gr.Interface(
fn=text_to_speech,
inputs=gr.Textbox(label="Enter story prompt"),
outputs=[
gr.Textbox(label="Generated Story"),
# gr.Audio(label="Generated Speech")
],
title="Story Generator and Text-to-Speech",
description="Enter a prompt to generate a story using GPT, then convert it to speech."
)
# Launch the app
iface.launch()