Spaces:
PlayHT
/
Running on CPU Upgrade

1littlecoder commited on
Commit
44b7d9c
1 Parent(s): 4ba619b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -70
app.py CHANGED
@@ -11,7 +11,7 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
11
  API_KEY = os.getenv('PLAY_API_KEY')
12
  USER_ID = os.getenv('PLAY_USER_ID')
13
 
14
- # theme selection
15
  theme = gr.themes.Base(
16
  primary_hue="emerald",
17
  )
@@ -22,63 +22,70 @@ def upload_to_gemini(path, mime_type="image/jpeg"):
22
  return file
23
 
24
  def generate_roast(image_path):
25
- # Upload the image to Gemini and get the text
26
- uploaded_file = upload_to_gemini(image_path)
27
- generation_config = {
28
- "temperature": 1,
29
- "top_p": 0.95,
30
- "top_k": 40,
31
- "max_output_tokens": 8192,
32
- "response_mime_type": "text/plain",
33
- }
34
- model = genai.GenerativeModel(
35
- model_name="gemini-1.5-flash-002",
36
- generation_config=generation_config,
37
- system_instruction="You are a professional satirist and fashion expert. You will be given a profile picture. Your duty is to roast in less than 50 words whatever is given to you in the funniest way possible!",
38
- )
39
-
40
- chat_session = model.start_chat(
41
- history=[{"role": "user", "parts": [uploaded_file]}]
42
- )
43
- response = chat_session.send_message("Roast this image!")
44
- return response.text
 
 
45
 
46
  # Function to convert text to speech with Play.ht
47
  def text_to_speech(text):
48
- url = "https://api.play.ht/api/v2/tts/stream"
49
- payload = {
50
- "voice": "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
51
- "output_format": "mp3",
52
- "text": text,
53
- }
54
- headers = {
55
- "accept": "audio/mpeg",
56
- "content-type": "application/json",
57
- "Authorization": API_KEY,
58
- "X-User-ID": USER_ID
59
- }
60
-
61
- response = requests.post(url, json=payload, headers=headers)
62
- if response.status_code == 200:
63
- audio_path = "output_audio.mp3"
64
- with open(audio_path, "wb") as audio_file:
65
- audio_file.write(response.content)
66
- return audio_path
67
- else:
68
- print(f"Error: {response.status_code} - {response.text}")
69
- return None # Return None if there's an error
 
 
70
 
71
  # Function to create video from image and audio
72
  def generate_video(image_path, audio_path):
73
- if audio_path is None:
74
- return None # Skip video generation if there's no valid audio file
75
-
76
- image_clip = ImageClip(image_path).set_duration(AudioFileClip(audio_path).duration)
77
- audio_clip = AudioFileClip(audio_path)
78
- video_clip = image_clip.set_audio(audio_clip)
79
- video_output_path = "output_video.mp4"
80
- video_clip.write_videofile(video_output_path, codec="libx264", audio_codec="aac")
81
- return video_output_path
 
 
 
82
 
83
  # Gradio Interface
84
  with gr.Blocks(theme=theme) as demo:
@@ -86,25 +93,24 @@ with gr.Blocks(theme=theme) as demo:
86
  gr.Markdown("Upload an image, and the AI will roast it, convert the roast to audio, and create a video output.")
87
 
88
  with gr.Row():
89
- with gr.Column():
90
- image_input = gr.Image(type="filepath", label="Upload Image")
91
- with gr.Column():
92
- output_text = gr.Textbox(label="Roast Text")
93
- audio_output = gr.Audio(label="Roast Audio")
94
- video_output = gr.Video(label="Roast Video")
95
-
96
- def process_image(image):
97
- roast_text = generate_roast(image)
98
- audio_path = text_to_speech(roast_text)
99
-
100
- if audio_path is None:
101
- return "Error generating audio. Please try again.", None, None
102
 
103
- video_path = generate_video(image, audio_path)
104
- return roast_text, audio_path, video_path
105
-
106
- submit_button = gr.Button("Generate Roast")
107
- submit_button.click(process_image, inputs=image_input, outputs=[output_text, audio_output, video_output])
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  # Launch the app
110
  demo.launch(debug=True)
 
11
  API_KEY = os.getenv('PLAY_API_KEY')
12
  USER_ID = os.getenv('PLAY_USER_ID')
13
 
14
+ # Theme selection
15
  theme = gr.themes.Base(
16
  primary_hue="emerald",
17
  )
 
22
  return file
23
 
24
  def generate_roast(image_path):
25
+ try:
26
+ uploaded_file = upload_to_gemini(image_path)
27
+ generation_config = {
28
+ "temperature": 1,
29
+ "top_p": 0.95,
30
+ "top_k": 40,
31
+ "max_output_tokens": 8192,
32
+ "response_mime_type": "text/plain",
33
+ }
34
+ model = genai.GenerativeModel(
35
+ model_name="gemini-1.5-flash-002",
36
+ generation_config=generation_config,
37
+ system_instruction="You are a professional satirist and fashion expert. You will be given a profile picture. Your duty is to roast in less than 50 words whatever is given to you in the funniest way possible!",
38
+ )
39
+
40
+ chat_session = model.start_chat(
41
+ history=[{"role": "user", "parts": [uploaded_file]}]
42
+ )
43
+ response = chat_session.send_message("Roast this image!")
44
+ return response.text
45
+ except Exception as e:
46
+ return f"Error generating roast: {e}"
47
 
48
  # Function to convert text to speech with Play.ht
49
  def text_to_speech(text):
50
+ try:
51
+ url = "https://api.play.ht/api/v2/tts/stream"
52
+ payload = {
53
+ "voice": "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
54
+ "output_format": "mp3",
55
+ "text": text,
56
+ }
57
+ headers = {
58
+ "accept": "audio/mpeg",
59
+ "content-type": "application/json",
60
+ "Authorization": API_KEY,
61
+ "X-User-ID": USER_ID
62
+ }
63
+
64
+ response = requests.post(url, json=payload, headers=headers)
65
+ if response.status_code == 200:
66
+ audio_path = "output_audio.mp3"
67
+ with open(audio_path, "wb") as audio_file:
68
+ audio_file.write(response.content)
69
+ return audio_path
70
+ else:
71
+ return f"Error generating audio: {response.status_code} - {response.text}"
72
+ except Exception as e:
73
+ return f"Error generating audio: {e}"
74
 
75
  # Function to create video from image and audio
76
  def generate_video(image_path, audio_path):
77
+ try:
78
+ if audio_path is None or "Error" in audio_path:
79
+ return "Error generating video: No valid audio file."
80
+
81
+ image_clip = ImageClip(image_path).set_duration(AudioFileClip(audio_path).duration)
82
+ audio_clip = AudioFileClip(audio_path)
83
+ video_clip = image_clip.set_audio(audio_clip)
84
+ video_output_path = "output_video.mp4"
85
+ video_clip.write_videofile(video_output_path, codec="libx264", audio_codec="aac")
86
+ return video_output_path
87
+ except Exception as e:
88
+ return f"Error generating video: {e}"
89
 
90
  # Gradio Interface
91
  with gr.Blocks(theme=theme) as demo:
 
93
  gr.Markdown("Upload an image, and the AI will roast it, convert the roast to audio, and create a video output.")
94
 
95
  with gr.Row():
96
+ image_input = gr.Image(type="filepath", label="Upload Image")
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ # Output areas
99
+ output_text = gr.Textbox(label="Roast Text")
100
+ audio_output = gr.Audio(label="Roast Audio")
101
+ video_output = gr.Video(label="Roast Video")
102
+
103
+ # Button to generate roast text
104
+ roast_button = gr.Button("Generate Roast Text")
105
+ roast_button.click(generate_roast, inputs=image_input, outputs=output_text)
106
+
107
+ # Button to generate audio from roast text
108
+ audio_button = gr.Button("Generate Roast Audio")
109
+ audio_button.click(text_to_speech, inputs=output_text, outputs=audio_output)
110
+
111
+ # Button to generate video from image and audio
112
+ video_button = gr.Button("Generate Roast Video")
113
+ video_button.click(generate_video, inputs=[image_input, audio_output], outputs=video_output)
114
 
115
  # Launch the app
116
  demo.launch(debug=True)