1littlecoder commited on
Commit
f16d803
1 Parent(s): 4d0dc05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -33
app.py CHANGED
@@ -1,7 +1,15 @@
1
  import os
 
 
2
  import google.generativeai as genai
3
  import gradio as gr
4
  import requests
 
 
 
 
 
 
5
 
6
  # Configure Google Gemini API
7
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
@@ -10,20 +18,11 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
10
  API_KEY = os.getenv('PLAY_API_KEY')
11
  USER_ID = os.getenv('PLAY_USER_ID')
12
 
13
- # theme selection let's go with this before the branded color
14
- #theme={"primary_hue": "#b4fd83"}
15
- theme = gr.themes.Base(
16
- primary_hue="emerald",
17
- )
18
-
19
-
20
- # Function to upload image to Gemini and get roasted text
21
  def upload_to_gemini(path, mime_type="image/jpeg"):
22
  file = genai.upload_file(path, mime_type=mime_type)
23
  return file
24
 
25
  def generate_roast(image_path):
26
- # Upload the image to Gemini and get the text
27
  uploaded_file = upload_to_gemini(image_path)
28
  generation_config = {
29
  "temperature": 1,
@@ -35,16 +34,12 @@ def generate_roast(image_path):
35
  model = genai.GenerativeModel(
36
  model_name="gemini-1.5-flash-002",
37
  generation_config=generation_config,
38
- system_instruction="You are a professional satirist and fashion expert. You will be given a profile picture. Your duty is to roast whatever is given to you in the funniest way possible!",
39
- )
40
-
41
- chat_session = model.start_chat(
42
- history=[{"role": "user", "parts": [uploaded_file]}]
43
  )
 
44
  response = chat_session.send_message("Roast this image!")
45
  return response.text
46
 
47
- # Function to convert text to speech with Play.ht
48
  def text_to_speech(text):
49
  url = "https://api.play.ht/api/v2/tts/stream"
50
  payload = {
@@ -58,7 +53,6 @@ def text_to_speech(text):
58
  "Authorization": API_KEY,
59
  "X-User-ID": USER_ID
60
  }
61
-
62
  response = requests.post(url, json=payload, headers=headers)
63
  if response.status_code == 200:
64
  audio_path = "output_audio.mp3"
@@ -66,27 +60,31 @@ def text_to_speech(text):
66
  audio_file.write(response.content)
67
  return audio_path
68
  else:
69
- return f"Error: {response.status_code} - {response.text}"
 
 
 
 
 
70
 
71
- # Gradio Interface
72
- with gr.Blocks(theme = theme) as demo:
73
- gr.Markdown("# Image to Text-to-Speech Roasting App")
74
- gr.Markdown("Upload an image, and the AI will roast it and convert the roast to audio.")
 
 
 
 
 
 
75
 
76
  with gr.Row():
77
- with gr.Column():
78
- image_input = gr.Image(type="filepath", label="Upload Image")
79
- with gr.Column():
80
- output_text = gr.Textbox(label="Roast Text")
81
- audio_output = gr.Audio(label="Roast Audio")
82
-
83
- def process_image(image):
84
- roast_text = generate_roast(image)
85
- audio_path = text_to_speech(roast_text)
86
- return roast_text, audio_path
87
 
88
- submit_button = gr.Button("Generate Roast")
89
- submit_button.click(process_image, inputs=image_input, outputs=[output_text, audio_output])
90
 
91
  # Launch the app
92
  demo.launch(debug=True)
 
1
  import os
2
+ import tempfile
3
+ import shutil
4
  import google.generativeai as genai
5
  import gradio as gr
6
  import requests
7
+ import numpy as np
8
+ import subprocess
9
+ import matplotlib.pyplot as plt
10
+ from matplotlib.animation import FuncAnimation
11
+ import PIL.Image
12
+ from gradio import processing_utils, utils
13
 
14
  # Configure Google Gemini API
15
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 
18
  API_KEY = os.getenv('PLAY_API_KEY')
19
  USER_ID = os.getenv('PLAY_USER_ID')
20
 
 
 
 
 
 
 
 
 
21
  def upload_to_gemini(path, mime_type="image/jpeg"):
22
  file = genai.upload_file(path, mime_type=mime_type)
23
  return file
24
 
25
  def generate_roast(image_path):
 
26
  uploaded_file = upload_to_gemini(image_path)
27
  generation_config = {
28
  "temperature": 1,
 
34
  model = genai.GenerativeModel(
35
  model_name="gemini-1.5-flash-002",
36
  generation_config=generation_config,
37
+ system_instruction="You are a professional satirist and fashion expert. Roast the profile picture.",
 
 
 
 
38
  )
39
+ chat_session = model.start_chat(history=[{"role": "user", "parts": [uploaded_file]}])
40
  response = chat_session.send_message("Roast this image!")
41
  return response.text
42
 
 
43
  def text_to_speech(text):
44
  url = "https://api.play.ht/api/v2/tts/stream"
45
  payload = {
 
53
  "Authorization": API_KEY,
54
  "X-User-ID": USER_ID
55
  }
 
56
  response = requests.post(url, json=payload, headers=headers)
57
  if response.status_code == 200:
58
  audio_path = "output_audio.mp3"
 
60
  audio_file.write(response.content)
61
  return audio_path
62
  else:
63
+ raise ValueError(f"Error: {response.status_code} - {response.text}")
64
+
65
+ # Generate waveform and overlay with image
66
+ def make_waveform_overlay(audio_path, image_path):
67
+ output_video_path = make_waveform(audio_path, bg_image=image_path, animate=True)
68
+ return output_video_path
69
 
70
+ # Full Gradio Functionality
71
+ def process_image(image):
72
+ roast_text = generate_roast(image)
73
+ audio_path = text_to_speech(roast_text)
74
+ final_video_path = make_waveform_overlay(audio_path, image)
75
+ return roast_text, final_video_path
76
+
77
+ # Gradio Blocks UI
78
+ with gr.Blocks() as demo:
79
+ gr.Markdown("# Image Roast and Waveform Video Generator")
80
 
81
  with gr.Row():
82
+ image_input = gr.Image(type="filepath", label="Upload Image")
83
+ output_text = gr.Textbox(label="Roast Text")
84
+ output_video = gr.Video(label="Roast Waveform Video")
 
 
 
 
 
 
 
85
 
86
+ submit_button = gr.Button("Generate Roast Video")
87
+ submit_button.click(process_image, inputs=image_input, outputs=[output_text, output_video])
88
 
89
  # Launch the app
90
  demo.launch(debug=True)