PoemGen / app.py
NadaAljohani's picture
Update app.py
6166a91 verified
raw
history blame
6.38 kB
from transformers import pipeline
from datasets import load_dataset
import gradio as gr
import torch
from diffusers import DiffusionPipeline
"""### **Arabic: Text-Generation:**
Generate a poetry in Arabic.
"""
pipe_ar = pipeline('text-generation', framework='pt', model='akhooli/ap2023', tokenizer='akhooli/ap2023')
"""### **English: Text-Generation:**
Generate a poetry in English.
"""
pipe_en = pipeline("text-generation", model="ismaelfaro/gpt2-poems.en")
"""### **Arabic and English: Text-To-Speech:**
Convert the Arabic/English poetry to speech.
"""
# Initialize text-to-speech models for Arabic and English
# Arabic: text-to-speech
synthesiser_arabic = pipeline("text-to-speech", model="MBZUAI/speecht5_tts_clartts_ar")
embeddings_dataset_arabic = load_dataset("herwoww/arabic_xvector_embeddings", split="validation")
speaker_embedding_arabic = torch.tensor(embeddings_dataset_arabic[105]["speaker_embeddings"]).unsqueeze(0)
# English: text-to-speech
synthesiser_english = pipeline("text-to-speech", model="microsoft/speecht5_tts")
embeddings_dataset_english = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding_english = torch.tensor(embeddings_dataset_english[7306]["xvector"]).unsqueeze(0)
"""### **English Text-To-Image:**
Convert the starter of the English poetry to an image.
"""
pipe_image = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
"""### **Translator from Arabic to English:**
The text-to-image model doesn't support Arabic, therefore we need to translate the starter of the Arabic poetry to English in order to generate image.
"""
pipe_translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ar-en")
"""### **Primary Function:**
This function will receive 2 inputs from the Gradio interface, and execute the following functions and return 3 outputs:
1. The generated poem.
2. The audio.
3. The image.
"""
def generate_poem(selected_language, text):
try:
if selected_language == "English":
poem = generate_poem_english(text)
sampling_rate, audio_data = text_to_speech_english(poem)
image = generate_image_from_poem(text)
elif selected_language == "Arabic":
poem = generate_poem_arabic(text)
sampling_rate, audio_data = text_to_speech_arabic(poem)
translated_text = translate_arabic_to_english(text)
image = generate_image_from_poem(translated_text)
return poem, (sampling_rate, audio_data), image
except Exception as e:
return f"Error: {str(e)}", None, None
"""### **Poem Generation Function:**
This function is responsible for generating a poem (text) in Arabic or English, based on the provided text.
"""
# Poem generation for Arabic
def generate_poem_arabic(text):
temp = 1.0
topk = 50
topp = 0.9
penalty = 1.2
generated_text = pipe_ar(
text,
max_length=96,
do_sample=True,
temperature=temp,
top_k=topk,
top_p=topp,
repetition_penalty=penalty,
min_length=64,
no_repeat_ngram_size=3,
return_full_text=True,
num_beams=5,
num_return_sequences=1
)[0]["generated_text"]
clean_text = generated_text.replace("-", "") # To get rid of the dashes generated by the model.
return clean_text
# Poem generation for English
def generate_poem_english(text):
generated_text = pipe_en(
text,
do_sample=True,
max_length=100,
top_k=50,
top_p=0.9,
temperature=1.0,
num_return_sequences=1
)[0]['generated_text']
clean_text = generated_text.replace("</s>", "") # To get rid of the </s> generated by the model.
return clean_text
"""### **Audio Function:**
This function is responsible for generating audio in Arabic or English, based on the provided text.
"""
def text_to_speech_arabic(text):
speech = synthesiser_arabic(text, speaker_embeddings=speaker_embedding_arabic)
audio_data = speech["audio"]
sampling_rate = speech["sampling_rate"]
return (sampling_rate, audio_data)
def text_to_speech_english(text):
speech = synthesiser_english(text, speaker_embeddings=speaker_embedding_english)
audio_data = speech["audio"]
sampling_rate = speech["sampling_rate"]
return (sampling_rate, audio_data)
"""### **Image Function:**
This function is responsible for generating an image based on the provided text.
"""
def generate_image_from_poem(poem_text):
image = pipe_image(poem_text).images[0]
return image
"""### **Translation Function:**
This function is responsible for translating Arabic input to English, to be used for the image function, which accepts only English inputs.
"""
def translate_arabic_to_english(text):
translated_text = pipe_translator(text)[0]['translation_text']
return translated_text
"""### **CSS Styling:**"""
custom_css = """
body {
background-color: #f4f4f9;
color: #333;
}
.gradio-container {
border-radius: 10px;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
background-color: #fff;
}
label {
color: #4A90E2;
font-weight: bold;
}
input[type="text"],
textarea {
border: 1px solid #4A90E2;
}
textarea {
height: 150px;
}
button {
background-color: #4A90E2;
color: #fff;
border-radius: 5px;
cursor: pointer;
}
button:hover {
background-color: #357ABD;
}
.dropdown {
border: 1px solid #4A90E2;
border-radius: 4px;
}
"""
"""### **Examples for Gradio:**
Provide 4 predefined inputs to demonstrate how the interface works.
"""
examples = [
["English", "The shining sun rises over the calm ocean"],
["Arabic", "الورود تتفتح في الربيع"],
["English", "The night sky is filled with stars and dreams"],
["Arabic", "أشعة الشمس المشرقة"]
]
"""### **Gradio Interface:**
Creating a Gradio interface to generate a poem, read the poem, and generate an image based on that poem.
"""
my_model = gr.Interface(
fn=generate_poem,
inputs=[
gr.Dropdown(["English", "Arabic"], label="Select Language"),
gr.Textbox(label="Enter a sentence")
],
outputs=[
gr.Textbox(label="Generated Poem", lines=10),
gr.Audio(label="Generated Audio", type="numpy"),
gr.Image(label="Generated Image")
],
examples=examples,
css=custom_css
)
my_model.launch()