1littlecoder commited on
Commit
69617e8
1 Parent(s): 5f4d187

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -19
app.py CHANGED
@@ -1,20 +1,49 @@
 
 
1
  import gradio as gr
2
  import requests
3
- import json
4
- import os
5
 
6
- # Replace with your actual Play.ht API key and User ID
 
 
 
7
  API_KEY = os.getenv('PLAY_API_KEY')
8
  USER_ID = os.getenv('PLAY_USER_ID')
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def text_to_speech(text):
11
  url = "https://api.play.ht/api/v2/tts/stream"
12
-
13
- # Customize the payload based on your Play.ht account setup
14
  payload = {
15
- "voice": "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json", # Replace with your desired voice
16
  "output_format": "mp3",
17
- "text": text # Text to be converted to speech
18
  }
19
  headers = {
20
  "accept": "audio/mpeg",
@@ -24,10 +53,7 @@ def text_to_speech(text):
24
  }
25
 
26
  response = requests.post(url, json=payload, headers=headers)
27
-
28
- # Check if the response was successful
29
  if response.status_code == 200:
30
- # Save the audio content to a file
31
  audio_path = "output_audio.mp3"
32
  with open(audio_path, "wb") as audio_file:
33
  audio_file.write(response.content)
@@ -35,13 +61,25 @@ def text_to_speech(text):
35
  else:
36
  return f"Error: {response.status_code} - {response.text}"
37
 
38
- # Set up Gradio Interface
39
- iface = gr.Interface(
40
- fn=text_to_speech,
41
- inputs="text",
42
- outputs="audio",
43
- title="Play.ht Text-to-Speech",
44
- description="Convert text into speech using Play.ht's TTS streaming API."
45
- )
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- iface.launch(debug = True)
 
 
1
+ import os
2
+ import google.generativeai as genai
3
  import gradio as gr
4
  import requests
 
 
5
 
6
+ # Configure Google Gemini API
7
+ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
8
+
9
+ # Play.ht API keys
10
  API_KEY = os.getenv('PLAY_API_KEY')
11
  USER_ID = os.getenv('PLAY_USER_ID')
12
 
13
+ # Function to upload image to Gemini and get roasted text
14
+ def upload_to_gemini(path, mime_type="image/jpeg"):
15
+ file = genai.upload_file(path, mime_type=mime_type)
16
+ return file
17
+
18
+ def generate_roast(image_path):
19
+ # Upload the image to Gemini and get the text
20
+ uploaded_file = upload_to_gemini(image_path)
21
+ generation_config = {
22
+ "temperature": 1,
23
+ "top_p": 0.95,
24
+ "top_k": 40,
25
+ "max_output_tokens": 8192,
26
+ "response_mime_type": "text/plain",
27
+ }
28
+ model = genai.GenerativeModel(
29
+ model_name="gemini-1.5-flash-002",
30
+ generation_config=generation_config,
31
+ system_instruction="You are a professional satirist and fashion expert. You will be given a profile picture. Your duty is to roast whatever is given to you in the funniest way possible!",
32
+ )
33
+
34
+ chat_session = model.start_chat(
35
+ history=[{"role": "user", "parts": [uploaded_file]}]
36
+ )
37
+ response = chat_session.send_message("Roast this image!")
38
+ return response.text
39
+
40
+ # Function to convert text to speech with Play.ht
41
  def text_to_speech(text):
42
  url = "https://api.play.ht/api/v2/tts/stream"
 
 
43
  payload = {
44
+ "voice": "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
45
  "output_format": "mp3",
46
+ "text": text,
47
  }
48
  headers = {
49
  "accept": "audio/mpeg",
 
53
  }
54
 
55
  response = requests.post(url, json=payload, headers=headers)
 
 
56
  if response.status_code == 200:
 
57
  audio_path = "output_audio.mp3"
58
  with open(audio_path, "wb") as audio_file:
59
  audio_file.write(response.content)
 
61
  else:
62
  return f"Error: {response.status_code} - {response.text}"
63
 
64
+ # Gradio Interface
65
+ with gr.Blocks(theme={"primary_hue": "#b4fd83"}) as demo:
66
+ gr.Markdown("# Image to Text-to-Speech Roasting App")
67
+ gr.Markdown("Upload an image, and the AI will roast it and convert the roast to audio.")
68
+
69
+ with gr.Row():
70
+ with gr.Column():
71
+ image_input = gr.Image(type="filepath", label="Upload Image")
72
+ with gr.Column():
73
+ output_text = gr.Textbox(label="Roast Text")
74
+ audio_output = gr.Audio(label="Roast Audio")
75
+
76
+ def process_image(image):
77
+ roast_text = generate_roast(image)
78
+ audio_path = text_to_speech(roast_text)
79
+ return roast_text, audio_path
80
+
81
+ submit_button = gr.Button("Generate Roast")
82
+ submit_button.click(process_image, inputs=image_input, outputs=[output_text, audio_output])
83
 
84
+ # Launch the app
85
+ demo.launch(debug=True)