Spaces:

PlayHT
/

roast_your_pic

Running on CPU Upgrade

App Files Files Community

1littlecoder commited on 21 days ago

Commit

d652179

•

1 Parent(s): dec1a60

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -8

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import google.generativeai as genai
 import gradio as gr
 import requests
 # Configure Google Gemini API
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
@@ -10,13 +11,11 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 API_KEY = os.getenv('PLAY_API_KEY')
 USER_ID = os.getenv('PLAY_USER_ID')
-# theme selection let's go with this before the branded color
-#theme={"primary_hue": "#b4fd83"}
 theme = gr.themes.Base(
     primary_hue="emerald",
 )
 # Function to upload image to Gemini and get roasted text
 def upload_to_gemini(path, mime_type="image/jpeg"):
     file = genai.upload_file(path, mime_type=mime_type)
@@ -68,10 +67,19 @@ def text_to_speech(text):
     else:
         return f"Error: {response.status_code} - {response.text}"
 # Gradio Interface
-with gr.Blocks(theme = theme) as demo:
     gr.Markdown("# Image to Text-to-Speech Roasting App")
-    gr.Markdown("Upload an image, and the AI will roast it and convert the roast to audio.")
     with gr.Row():
         with gr.Column():
@@ -79,14 +87,16 @@ with gr.Blocks(theme = theme) as demo:
         with gr.Column():
             output_text = gr.Textbox(label="Roast Text")
             audio_output = gr.Audio(label="Roast Audio")
     def process_image(image):
         roast_text = generate_roast(image)
         audio_path = text_to_speech(roast_text)
-        return roast_text, audio_path
     submit_button = gr.Button("Generate Roast")
-    submit_button.click(process_image, inputs=image_input, outputs=[output_text, audio_output])
 # Launch the app
-demo.launch(debug=True)

 import google.generativeai as genai
 import gradio as gr
 import requests
+from moviepy.editor import ImageClip, AudioFileClip
 # Configure Google Gemini API
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 API_KEY = os.getenv('PLAY_API_KEY')
 USER_ID = os.getenv('PLAY_USER_ID')
+# theme selection
 theme = gr.themes.Base(
     primary_hue="emerald",
 )
 # Function to upload image to Gemini and get roasted text
 def upload_to_gemini(path, mime_type="image/jpeg"):
     file = genai.upload_file(path, mime_type=mime_type)
     else:
         return f"Error: {response.status_code} - {response.text}"
+# Function to create video from image and audio
+def generate_video(image_path, audio_path):
+    image_clip = ImageClip(image_path).set_duration(AudioFileClip(audio_path).duration)
+    audio_clip = AudioFileClip(audio_path)
+    video_clip = image_clip.set_audio(audio_clip)
+    video_output_path = "output_video.mp4"
+    video_clip.write_videofile(video_output_path, codec="libx264", audio_codec="aac")
+    return video_output_path
 # Gradio Interface
+with gr.Blocks(theme=theme) as demo:
     gr.Markdown("# Image to Text-to-Speech Roasting App")
+    gr.Markdown("Upload an image, and the AI will roast it, convert the roast to audio, and create a video output.")
     with gr.Row():
         with gr.Column():
         with gr.Column():
             output_text = gr.Textbox(label="Roast Text")
             audio_output = gr.Audio(label="Roast Audio")
+            video_output = gr.Video(label="Roast Video")
     def process_image(image):
         roast_text = generate_roast(image)
         audio_path = text_to_speech(roast_text)
+        video_path = generate_video(image, audio_path)
+        return roast_text, audio_path, video_path
     submit_button = gr.Button("Generate Roast")
+    submit_button.click(process_image, inputs=image_input, outputs=[output_text, audio_output, video_output])
 # Launch the app
+demo.launch(debug=True)