1littlecoder commited on
Commit
d652179
1 Parent(s): dec1a60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -8
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import google.generativeai as genai
3
  import gradio as gr
4
  import requests
 
5
 
6
  # Configure Google Gemini API
7
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
@@ -10,13 +11,11 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
10
  API_KEY = os.getenv('PLAY_API_KEY')
11
  USER_ID = os.getenv('PLAY_USER_ID')
12
 
13
- # theme selection let's go with this before the branded color
14
- #theme={"primary_hue": "#b4fd83"}
15
  theme = gr.themes.Base(
16
  primary_hue="emerald",
17
  )
18
 
19
-
20
  # Function to upload image to Gemini and get roasted text
21
  def upload_to_gemini(path, mime_type="image/jpeg"):
22
  file = genai.upload_file(path, mime_type=mime_type)
@@ -68,10 +67,19 @@ def text_to_speech(text):
68
  else:
69
  return f"Error: {response.status_code} - {response.text}"
70
 
 
 
 
 
 
 
 
 
 
71
  # Gradio Interface
72
- with gr.Blocks(theme = theme) as demo:
73
  gr.Markdown("# Image to Text-to-Speech Roasting App")
74
- gr.Markdown("Upload an image, and the AI will roast it and convert the roast to audio.")
75
 
76
  with gr.Row():
77
  with gr.Column():
@@ -79,14 +87,16 @@ with gr.Blocks(theme = theme) as demo:
79
  with gr.Column():
80
  output_text = gr.Textbox(label="Roast Text")
81
  audio_output = gr.Audio(label="Roast Audio")
 
82
 
83
  def process_image(image):
84
  roast_text = generate_roast(image)
85
  audio_path = text_to_speech(roast_text)
86
- return roast_text, audio_path
 
87
 
88
  submit_button = gr.Button("Generate Roast")
89
- submit_button.click(process_image, inputs=image_input, outputs=[output_text, audio_output])
90
 
91
  # Launch the app
92
- demo.launch(debug=True)
 
2
  import google.generativeai as genai
3
  import gradio as gr
4
  import requests
5
+ from moviepy.editor import ImageClip, AudioFileClip
6
 
7
  # Configure Google Gemini API
8
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 
11
  API_KEY = os.getenv('PLAY_API_KEY')
12
  USER_ID = os.getenv('PLAY_USER_ID')
13
 
14
+ # theme selection
 
15
  theme = gr.themes.Base(
16
  primary_hue="emerald",
17
  )
18
 
 
19
  # Function to upload image to Gemini and get roasted text
20
  def upload_to_gemini(path, mime_type="image/jpeg"):
21
  file = genai.upload_file(path, mime_type=mime_type)
 
67
  else:
68
  return f"Error: {response.status_code} - {response.text}"
69
 
70
+ # Function to create video from image and audio
71
+ def generate_video(image_path, audio_path):
72
+ image_clip = ImageClip(image_path).set_duration(AudioFileClip(audio_path).duration)
73
+ audio_clip = AudioFileClip(audio_path)
74
+ video_clip = image_clip.set_audio(audio_clip)
75
+ video_output_path = "output_video.mp4"
76
+ video_clip.write_videofile(video_output_path, codec="libx264", audio_codec="aac")
77
+ return video_output_path
78
+
79
  # Gradio Interface
80
+ with gr.Blocks(theme=theme) as demo:
81
  gr.Markdown("# Image to Text-to-Speech Roasting App")
82
+ gr.Markdown("Upload an image, and the AI will roast it, convert the roast to audio, and create a video output.")
83
 
84
  with gr.Row():
85
  with gr.Column():
 
87
  with gr.Column():
88
  output_text = gr.Textbox(label="Roast Text")
89
  audio_output = gr.Audio(label="Roast Audio")
90
+ video_output = gr.Video(label="Roast Video")
91
 
92
  def process_image(image):
93
  roast_text = generate_roast(image)
94
  audio_path = text_to_speech(roast_text)
95
+ video_path = generate_video(image, audio_path)
96
+ return roast_text, audio_path, video_path
97
 
98
  submit_button = gr.Button("Generate Roast")
99
+ submit_button.click(process_image, inputs=image_input, outputs=[output_text, audio_output, video_output])
100
 
101
  # Launch the app
102
+ demo.launch(debug=True)