Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
1littlecoder
commited on
Commit
•
f16d803
1
Parent(s):
4d0dc05
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,15 @@
|
|
1 |
import os
|
|
|
|
|
2 |
import google.generativeai as genai
|
3 |
import gradio as gr
|
4 |
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
# Configure Google Gemini API
|
7 |
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
|
@@ -10,20 +18,11 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
|
|
10 |
API_KEY = os.getenv('PLAY_API_KEY')
|
11 |
USER_ID = os.getenv('PLAY_USER_ID')
|
12 |
|
13 |
-
# theme selection let's go with this before the branded color
|
14 |
-
#theme={"primary_hue": "#b4fd83"}
|
15 |
-
theme = gr.themes.Base(
|
16 |
-
primary_hue="emerald",
|
17 |
-
)
|
18 |
-
|
19 |
-
|
20 |
-
# Function to upload image to Gemini and get roasted text
|
21 |
def upload_to_gemini(path, mime_type="image/jpeg"):
|
22 |
file = genai.upload_file(path, mime_type=mime_type)
|
23 |
return file
|
24 |
|
25 |
def generate_roast(image_path):
|
26 |
-
# Upload the image to Gemini and get the text
|
27 |
uploaded_file = upload_to_gemini(image_path)
|
28 |
generation_config = {
|
29 |
"temperature": 1,
|
@@ -35,16 +34,12 @@ def generate_roast(image_path):
|
|
35 |
model = genai.GenerativeModel(
|
36 |
model_name="gemini-1.5-flash-002",
|
37 |
generation_config=generation_config,
|
38 |
-
system_instruction="You are a professional satirist and fashion expert.
|
39 |
-
)
|
40 |
-
|
41 |
-
chat_session = model.start_chat(
|
42 |
-
history=[{"role": "user", "parts": [uploaded_file]}]
|
43 |
)
|
|
|
44 |
response = chat_session.send_message("Roast this image!")
|
45 |
return response.text
|
46 |
|
47 |
-
# Function to convert text to speech with Play.ht
|
48 |
def text_to_speech(text):
|
49 |
url = "https://api.play.ht/api/v2/tts/stream"
|
50 |
payload = {
|
@@ -58,7 +53,6 @@ def text_to_speech(text):
|
|
58 |
"Authorization": API_KEY,
|
59 |
"X-User-ID": USER_ID
|
60 |
}
|
61 |
-
|
62 |
response = requests.post(url, json=payload, headers=headers)
|
63 |
if response.status_code == 200:
|
64 |
audio_path = "output_audio.mp3"
|
@@ -66,27 +60,31 @@ def text_to_speech(text):
|
|
66 |
audio_file.write(response.content)
|
67 |
return audio_path
|
68 |
else:
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
-
# Gradio
|
72 |
-
|
73 |
-
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
with gr.Row():
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
output_text = gr.Textbox(label="Roast Text")
|
81 |
-
audio_output = gr.Audio(label="Roast Audio")
|
82 |
-
|
83 |
-
def process_image(image):
|
84 |
-
roast_text = generate_roast(image)
|
85 |
-
audio_path = text_to_speech(roast_text)
|
86 |
-
return roast_text, audio_path
|
87 |
|
88 |
-
submit_button = gr.Button("Generate Roast")
|
89 |
-
submit_button.click(process_image, inputs=image_input, outputs=[output_text,
|
90 |
|
91 |
# Launch the app
|
92 |
demo.launch(debug=True)
|
|
|
1 |
import os
|
2 |
+
import tempfile
|
3 |
+
import shutil
|
4 |
import google.generativeai as genai
|
5 |
import gradio as gr
|
6 |
import requests
|
7 |
+
import numpy as np
|
8 |
+
import subprocess
|
9 |
+
import matplotlib.pyplot as plt
|
10 |
+
from matplotlib.animation import FuncAnimation
|
11 |
+
import PIL.Image
|
12 |
+
from gradio import processing_utils, utils
|
13 |
|
14 |
# Configure Google Gemini API
|
15 |
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
|
|
|
18 |
API_KEY = os.getenv('PLAY_API_KEY')
|
19 |
USER_ID = os.getenv('PLAY_USER_ID')
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def upload_to_gemini(path, mime_type="image/jpeg"):
|
22 |
file = genai.upload_file(path, mime_type=mime_type)
|
23 |
return file
|
24 |
|
25 |
def generate_roast(image_path):
|
|
|
26 |
uploaded_file = upload_to_gemini(image_path)
|
27 |
generation_config = {
|
28 |
"temperature": 1,
|
|
|
34 |
model = genai.GenerativeModel(
|
35 |
model_name="gemini-1.5-flash-002",
|
36 |
generation_config=generation_config,
|
37 |
+
system_instruction="You are a professional satirist and fashion expert. Roast the profile picture.",
|
|
|
|
|
|
|
|
|
38 |
)
|
39 |
+
chat_session = model.start_chat(history=[{"role": "user", "parts": [uploaded_file]}])
|
40 |
response = chat_session.send_message("Roast this image!")
|
41 |
return response.text
|
42 |
|
|
|
43 |
def text_to_speech(text):
|
44 |
url = "https://api.play.ht/api/v2/tts/stream"
|
45 |
payload = {
|
|
|
53 |
"Authorization": API_KEY,
|
54 |
"X-User-ID": USER_ID
|
55 |
}
|
|
|
56 |
response = requests.post(url, json=payload, headers=headers)
|
57 |
if response.status_code == 200:
|
58 |
audio_path = "output_audio.mp3"
|
|
|
60 |
audio_file.write(response.content)
|
61 |
return audio_path
|
62 |
else:
|
63 |
+
raise ValueError(f"Error: {response.status_code} - {response.text}")
|
64 |
+
|
65 |
+
# Generate waveform and overlay with image
|
66 |
+
def make_waveform_overlay(audio_path, image_path):
|
67 |
+
output_video_path = make_waveform(audio_path, bg_image=image_path, animate=True)
|
68 |
+
return output_video_path
|
69 |
|
70 |
+
# Full Gradio Functionality
|
71 |
+
def process_image(image):
|
72 |
+
roast_text = generate_roast(image)
|
73 |
+
audio_path = text_to_speech(roast_text)
|
74 |
+
final_video_path = make_waveform_overlay(audio_path, image)
|
75 |
+
return roast_text, final_video_path
|
76 |
+
|
77 |
+
# Gradio Blocks UI
|
78 |
+
with gr.Blocks() as demo:
|
79 |
+
gr.Markdown("# Image Roast and Waveform Video Generator")
|
80 |
|
81 |
with gr.Row():
|
82 |
+
image_input = gr.Image(type="filepath", label="Upload Image")
|
83 |
+
output_text = gr.Textbox(label="Roast Text")
|
84 |
+
output_video = gr.Video(label="Roast Waveform Video")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
+
submit_button = gr.Button("Generate Roast Video")
|
87 |
+
submit_button.click(process_image, inputs=image_input, outputs=[output_text, output_video])
|
88 |
|
89 |
# Launch the app
|
90 |
demo.launch(debug=True)
|