Create App.py
Browse files
App.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from gpt4v import GPT4Vision
|
3 |
+
from gpttts import GPTTTS
|
4 |
+
from openai import OpenAI
|
5 |
+
import os
|
6 |
+
|
7 |
+
# Initialize GPT4Vision and OpenAI client
|
8 |
+
image = GPT4Vision()
|
9 |
+
client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
|
10 |
+
talk = GPTTTS(client)
|
11 |
+
|
12 |
+
st.set_page_config(page_title="Miracle", page_icon="🤖", layout="wide")
|
13 |
+
|
14 |
+
|
15 |
+
def main():
|
16 |
+
|
17 |
+
|
18 |
+
col1, col2, col3 = st.columns(spec=[1,1,1],gap="small")
|
19 |
+
|
20 |
+
with col1:
|
21 |
+
# st.image("https://static.streamlit.io/examples/cat.jpg")
|
22 |
+
|
23 |
+
language = st.selectbox("Select language", options=["Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian", "Czech", "Danish", "Dutch", "English", "Estonian", "Finnish", "French", "Galician", "German", "Greek", "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", "Italian", "Japanese", "Kannada", "Kazakh", "Korean", "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi", "Maori", "Nepali", "Norwegian", "Persian", "Polish", "Portuguese", "Romanian", "Russian", "Serbian", "Slovak", "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog", "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu", "Vietnamese", "Welsh"],index=13)
|
24 |
+
|
25 |
+
|
26 |
+
# 1. Let user upload an image or video, or use camera to capture an image
|
27 |
+
uploaded_file = st.file_uploader("Upload image or video", label_visibility="hidden")
|
28 |
+
|
29 |
+
with col3:
|
30 |
+
# st.image("https://static.streamlit.io/examples/dog.jpg")
|
31 |
+
|
32 |
+
camera_image = st.camera_input("Or take a picture", label_visibility="hidden")
|
33 |
+
|
34 |
+
# Use the uploaded file or the camera image, whichever is available
|
35 |
+
file_to_process = uploaded_file if uploaded_file is not None else camera_image
|
36 |
+
|
37 |
+
if file_to_process is not None:
|
38 |
+
# Display the uploaded or captured image
|
39 |
+
|
40 |
+
with col2:
|
41 |
+
|
42 |
+
st.image(file_to_process, caption="Image")
|
43 |
+
|
44 |
+
# 2. Send the file to GPT4Vision for explanation
|
45 |
+
with st.spinner("Generating explanation..."):
|
46 |
+
explanation = image.describe(image_file=file_to_process, user_message="Describe this image in language:"+language)
|
47 |
+
st.success(explanation)
|
48 |
+
# 3. Display the explanation
|
49 |
+
# st.write(explanation)
|
50 |
+
|
51 |
+
# 4. Send response to GPTTTS to be spoken
|
52 |
+
with st.spinner("Generating audio..."):
|
53 |
+
audio_file_path = talk.generate_speech(text=explanation, model="tts-1-hd", voice="alloy")
|
54 |
+
# 5. Play audio automatically - Read the audio file and pass its content
|
55 |
+
with open(audio_file_path, "rb") as audio_file:
|
56 |
+
audio_data = audio_file.read()
|
57 |
+
st.audio(audio_data, format='audio/mp3', start_time=0)
|
58 |
+
|
59 |
+
# Optionally, delete the audio file after playing
|
60 |
+
# os.remove(audio_file_path)
|
61 |
+
|
62 |
+
if __name__ == "__main__":
|
63 |
+
main()
|