Spaces:
Sleeping
Sleeping
NadaAljohani
commited on
Commit
•
f8f62a8
1
Parent(s):
2cd8ce8
Update app.py
Browse files
app.py
CHANGED
@@ -4,143 +4,72 @@ import gradio as gr
|
|
4 |
import torch
|
5 |
from diffusers import DiffusionPipeline
|
6 |
|
7 |
-
"""### **Arabic: Text-Generation:**
|
8 |
-
Generate a poetry in Arabic.
|
9 |
-
"""
|
10 |
-
|
11 |
pipe_ar = pipeline('text-generation', framework='pt', model='akhooli/ap2023', tokenizer='akhooli/ap2023')
|
12 |
-
|
13 |
-
"""### **English: Text-Generation:**
|
14 |
-
Generate a poetry in English.
|
15 |
-
"""
|
16 |
-
|
17 |
pipe_en = pipeline("text-generation", model="ismaelfaro/gpt2-poems.en")
|
18 |
-
|
19 |
-
"""
|
20 |
-
Convert the Arabic/English poetry to speech.
|
21 |
-
"""
|
22 |
|
23 |
# Initialize text-to-speech models for Arabic and English
|
24 |
# Arabic: text-to-speech
|
25 |
-
synthesiser_arabic = pipeline("text-to-speech", model="
|
26 |
-
embeddings_dataset_arabic = load_dataset("herwoww/arabic_xvector_embeddings", split="validation")
|
27 |
-
speaker_embedding_arabic = torch.tensor(embeddings_dataset_arabic[105]["speaker_embeddings"]).unsqueeze(0)
|
28 |
|
29 |
# English: text-to-speech
|
30 |
synthesiser_english = pipeline("text-to-speech", model="microsoft/speecht5_tts")
|
31 |
embeddings_dataset_english = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
32 |
speaker_embedding_english = torch.tensor(embeddings_dataset_english[7306]["xvector"]).unsqueeze(0)
|
33 |
|
34 |
-
|
35 |
-
Convert the starter of the English poetry to an image.
|
36 |
-
"""
|
37 |
-
|
38 |
-
pipe_image = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
|
39 |
-
|
40 |
-
"""### **Translator from Arabic to English:**
|
41 |
-
The text-to-image model doesn't support Arabic, therefore we need to translate the starter of the Arabic poetry to English in order to generate image.
|
42 |
-
"""
|
43 |
-
|
44 |
-
pipe_translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ar-en")
|
45 |
-
|
46 |
-
"""### **Primary Function:**
|
47 |
-
This function will receive 2 inputs from the Gradio interface, and execute the following functions and return 3 outputs:
|
48 |
-
1. The generated poem.
|
49 |
-
2. The audio.
|
50 |
-
3. The image.
|
51 |
-
"""
|
52 |
-
|
53 |
def generate_poem(selected_language, text):
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
return poem, (sampling_rate, audio_data), image
|
66 |
-
except Exception as e:
|
67 |
-
return f"Error: {str(e)}", None, None
|
68 |
-
|
69 |
-
"""### **Poem Generation Function:**
|
70 |
-
This function is responsible for generating a poem (text) in Arabic or English, based on the provided text.
|
71 |
-
"""
|
72 |
|
73 |
# Poem generation for Arabic
|
74 |
def generate_poem_arabic(text):
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
penalty = 1.2
|
79 |
-
generated_text = pipe_ar(
|
80 |
-
text,
|
81 |
-
max_length=96,
|
82 |
-
do_sample=True,
|
83 |
-
temperature=temp,
|
84 |
-
top_k=topk,
|
85 |
-
top_p=topp,
|
86 |
-
repetition_penalty=penalty,
|
87 |
-
min_length=64,
|
88 |
-
no_repeat_ngram_size=3,
|
89 |
-
return_full_text=True,
|
90 |
-
num_beams=5,
|
91 |
-
num_return_sequences=1
|
92 |
-
)[0]["generated_text"]
|
93 |
-
clean_text = generated_text.replace("-", "") # To get rid of the dashes generated by the model.
|
94 |
return clean_text
|
95 |
|
96 |
# Poem generation for English
|
97 |
def generate_poem_english(text):
|
98 |
-
generated_text = pipe_en(
|
99 |
-
|
100 |
-
|
101 |
-
max_length=100,
|
102 |
-
top_k=50,
|
103 |
-
top_p=0.9,
|
104 |
-
temperature=1.0,
|
105 |
-
num_return_sequences=1
|
106 |
-
)[0]['generated_text']
|
107 |
-
clean_text = generated_text.replace("</s>", "") # To get rid of the </s> generated by the model.
|
108 |
return clean_text
|
109 |
|
110 |
-
|
111 |
-
This function is responsible for generating audio in Arabic or English, based on the provided text.
|
112 |
-
"""
|
113 |
-
|
114 |
def text_to_speech_arabic(text):
|
115 |
-
speech = synthesiser_arabic(text
|
116 |
-
audio_data = speech["audio"]
|
117 |
sampling_rate = speech["sampling_rate"]
|
118 |
return (sampling_rate, audio_data)
|
119 |
|
|
|
120 |
def text_to_speech_english(text):
|
121 |
-
speech = synthesiser_english(text, speaker_embeddings
|
122 |
audio_data = speech["audio"]
|
123 |
sampling_rate = speech["sampling_rate"]
|
124 |
return (sampling_rate, audio_data)
|
125 |
|
126 |
-
|
127 |
-
This function is responsible for generating an image based on the provided text.
|
128 |
-
"""
|
129 |
-
|
130 |
def generate_image_from_poem(poem_text):
|
131 |
image = pipe_image(poem_text).images[0]
|
132 |
return image
|
133 |
|
134 |
-
|
135 |
-
This function is responsible for translating Arabic input to English, to be used for the image function, which accepts only English inputs.
|
136 |
-
"""
|
137 |
-
|
138 |
def translate_arabic_to_english(text):
|
139 |
translated_text = pipe_translator(text)[0]['translation_text']
|
140 |
return translated_text
|
141 |
|
142 |
-
"""### **CSS Styling:**"""
|
143 |
-
|
144 |
custom_css = """
|
145 |
body {
|
146 |
background-color: #f4f4f9;
|
@@ -155,7 +84,6 @@ label {
|
|
155 |
color: #4A90E2;
|
156 |
font-weight: bold;
|
157 |
}
|
158 |
-
|
159 |
input[type="text"],
|
160 |
textarea {
|
161 |
border: 1px solid #4A90E2;
|
@@ -163,7 +91,6 @@ textarea {
|
|
163 |
textarea {
|
164 |
height: 150px;
|
165 |
}
|
166 |
-
|
167 |
button {
|
168 |
background-color: #4A90E2;
|
169 |
color: #fff;
|
@@ -173,40 +100,30 @@ button {
|
|
173 |
button:hover {
|
174 |
background-color: #357ABD;
|
175 |
}
|
176 |
-
|
177 |
.dropdown {
|
178 |
border: 1px solid #4A90E2;
|
179 |
border-radius: 4px;
|
180 |
}
|
181 |
"""
|
|
|
|
|
182 |
|
183 |
-
"""### **Examples for Gradio:**
|
184 |
-
Provide 4 predefined inputs to demonstrate how the interface works.
|
185 |
-
"""
|
186 |
-
|
187 |
-
examples = [
|
188 |
-
["English", "The shining sun rises over the calm ocean"],
|
189 |
-
["Arabic", "الورود تتفتح في الربيع"],
|
190 |
-
["English", "The night sky is filled with stars and dreams"],
|
191 |
-
["Arabic", "أشعة الشمس المشرقة"]
|
192 |
-
]
|
193 |
-
|
194 |
-
"""### **Gradio Interface:**
|
195 |
-
Creating a Gradio interface to generate a poem, read the poem, and generate an image based on that poem.
|
196 |
-
"""
|
197 |
|
198 |
my_model = gr.Interface(
|
199 |
-
fn=generate_poem,
|
200 |
inputs=[
|
201 |
-
gr.Dropdown(["English", "Arabic"], label="Select Language"),
|
202 |
-
gr.Textbox(label="Enter a sentence")
|
203 |
-
|
204 |
outputs=[
|
205 |
-
gr.Textbox(label="Generated Poem", lines=10),
|
206 |
-
gr.Audio(label="Generated Audio", type="numpy"),
|
207 |
-
gr.Image(label="Generated Image")
|
208 |
-
|
209 |
-
examples=examples,
|
210 |
-
css=custom_css
|
211 |
)
|
212 |
my_model.launch()
|
|
|
|
|
|
|
|
4 |
import torch
|
5 |
from diffusers import DiffusionPipeline
|
6 |
|
|
|
|
|
|
|
|
|
7 |
pipe_ar = pipeline('text-generation', framework='pt', model='akhooli/ap2023', tokenizer='akhooli/ap2023')
|
|
|
|
|
|
|
|
|
|
|
8 |
pipe_en = pipeline("text-generation", model="ismaelfaro/gpt2-poems.en")
|
9 |
+
pipe_image = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
|
10 |
+
pipe_translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ar-en")
|
|
|
|
|
11 |
|
12 |
# Initialize text-to-speech models for Arabic and English
|
13 |
# Arabic: text-to-speech
|
14 |
+
synthesiser_arabic = pipeline("text-to-speech", model="facebook/mms-tts-ara")
|
|
|
|
|
15 |
|
16 |
# English: text-to-speech
|
17 |
synthesiser_english = pipeline("text-to-speech", model="microsoft/speecht5_tts")
|
18 |
embeddings_dataset_english = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
19 |
speaker_embedding_english = torch.tensor(embeddings_dataset_english[7306]["xvector"]).unsqueeze(0)
|
20 |
|
21 |
+
# Generate poem based on language and convert it to audio and image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
def generate_poem(selected_language, text):
|
23 |
+
if selected_language == "English":
|
24 |
+
poem = generate_poem_english(text) #retrun the generated poem from the generate_poem_english function
|
25 |
+
sampling_rate, audio_data = text_to_speech_english(poem) #return the audio from the text_to_speech_english function
|
26 |
+
image = generate_image_from_poem(poem) #return the image from the generate_image_from_poem function
|
27 |
+
elif selected_language == "Arabic":
|
28 |
+
poem = generate_poem_arabic(text) #retrun the generated poem from the generate_poem_arabic function
|
29 |
+
sampling_rate, audio_data = text_to_speech_arabic(poem) #return the audio from the text_to_speech_arabic function
|
30 |
+
translated_text = translate_arabic_to_english(poem) #return the translated poem from arabic to englsih, using translate_arabic_to_english function
|
31 |
+
image = generate_image_from_poem(translated_text) #return the image from the generate_image_from_poem function
|
32 |
+
|
33 |
+
return poem, (sampling_rate, audio_data), image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
# Poem generation for Arabic
|
36 |
def generate_poem_arabic(text):
|
37 |
+
generated_text = pipe_ar(text, do_sample=True, max_length=96, top_k=50, top_p=1.0, temperature=1.0, num_return_sequences=1,
|
38 |
+
no_repeat_ngram_size = 3, return_full_text=True)[0]["generated_text"]
|
39 |
+
clean_text = generated_text.replace("-", "") #To get rid of the dashs generated by the model.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
return clean_text
|
41 |
|
42 |
# Poem generation for English
|
43 |
def generate_poem_english(text):
|
44 |
+
generated_text = pipe_en(text, do_sample=True, max_length=50)[0]['generated_text']
|
45 |
+
clean_text = generated_text.replace("-", "") # Remove dashes generated by the model
|
46 |
+
clean_text = clean_text.replace("\\n", " ") # Replace newlines with a space
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
return clean_text
|
48 |
|
49 |
+
# Text-to-speech conversion for Arabic
|
|
|
|
|
|
|
50 |
def text_to_speech_arabic(text):
|
51 |
+
speech = synthesiser_arabic(text)
|
52 |
+
audio_data = speech["audio"][0] # Flatten to 1D
|
53 |
sampling_rate = speech["sampling_rate"]
|
54 |
return (sampling_rate, audio_data)
|
55 |
|
56 |
+
# Text-to-speech conversion for English
|
57 |
def text_to_speech_english(text):
|
58 |
+
speech = synthesiser_english(text, forward_params={"speaker_embeddings": speaker_embedding_english})
|
59 |
audio_data = speech["audio"]
|
60 |
sampling_rate = speech["sampling_rate"]
|
61 |
return (sampling_rate, audio_data)
|
62 |
|
63 |
+
#Image Function
|
|
|
|
|
|
|
64 |
def generate_image_from_poem(poem_text):
|
65 |
image = pipe_image(poem_text).images[0]
|
66 |
return image
|
67 |
|
68 |
+
#Translation Function from Arabic to English
|
|
|
|
|
|
|
69 |
def translate_arabic_to_english(text):
|
70 |
translated_text = pipe_translator(text)[0]['translation_text']
|
71 |
return translated_text
|
72 |
|
|
|
|
|
73 |
custom_css = """
|
74 |
body {
|
75 |
background-color: #f4f4f9;
|
|
|
84 |
color: #4A90E2;
|
85 |
font-weight: bold;
|
86 |
}
|
|
|
87 |
input[type="text"],
|
88 |
textarea {
|
89 |
border: 1px solid #4A90E2;
|
|
|
91 |
textarea {
|
92 |
height: 150px;
|
93 |
}
|
|
|
94 |
button {
|
95 |
background-color: #4A90E2;
|
96 |
color: #fff;
|
|
|
100 |
button:hover {
|
101 |
background-color: #357ABD;
|
102 |
}
|
|
|
103 |
.dropdown {
|
104 |
border: 1px solid #4A90E2;
|
105 |
border-radius: 4px;
|
106 |
}
|
107 |
"""
|
108 |
+
#First parameter is for the dropdown menu, and the second parameter is for the starter of the poem
|
109 |
+
examples = [["Arabic", "الورود تتفتح في فصل الربيع"]]
|
110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
my_model = gr.Interface(
|
113 |
+
fn=generate_poem, #The primary function that will recives the inputs (language and the starter of the poem)
|
114 |
inputs=[
|
115 |
+
gr.Dropdown(["English", "Arabic"], label="Select Language"), #Dropdown menu to select the language, either "English" or "Arabic" for the poem
|
116 |
+
gr.Textbox(label="Enter a sentence")], #Textbox where the user will input a sentence or phrase to generate the poem (starter of the peom)
|
117 |
+
|
118 |
outputs=[
|
119 |
+
gr.Textbox(label="Generated Poem", lines=10), # Textbox to display the generated poem
|
120 |
+
gr.Audio(label="Generated Audio", type="numpy"), #Audio output for the generated poem
|
121 |
+
gr.Image(label="Generated Image")], #Display an image generated from the starter of the peom
|
122 |
+
|
123 |
+
examples=examples, #Predefined examples to guide the user how to use the interface
|
124 |
+
css=custom_css #Applying CSS Custeom
|
125 |
)
|
126 |
my_model.launch()
|
127 |
+
|
128 |
+
if __name__ == "__main__":
|
129 |
+
app.run(host="0.0.0.0", port=7860)
|