kivilaid commited on
Commit
c3e0f1a
1 Parent(s): a5f1a61

Create App.py

Browse files
Files changed (1) hide show
  1. App.py +63 -0
App.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from gpt4v import GPT4Vision
3
+ from gpttts import GPTTTS
4
+ from openai import OpenAI
5
+ import os
6
+
7
+ # Initialize GPT4Vision and OpenAI client
8
+ image = GPT4Vision()
9
+ client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
10
+ talk = GPTTTS(client)
11
+
12
+ st.set_page_config(page_title="Miracle", page_icon="🤖", layout="wide")
13
+
14
+
15
+ def main():
16
+
17
+
18
+ col1, col2, col3 = st.columns(spec=[1,1,1],gap="small")
19
+
20
+ with col1:
21
+ # st.image("https://static.streamlit.io/examples/cat.jpg")
22
+
23
+ language = st.selectbox("Select language", options=["Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian", "Czech", "Danish", "Dutch", "English", "Estonian", "Finnish", "French", "Galician", "German", "Greek", "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", "Italian", "Japanese", "Kannada", "Kazakh", "Korean", "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi", "Maori", "Nepali", "Norwegian", "Persian", "Polish", "Portuguese", "Romanian", "Russian", "Serbian", "Slovak", "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog", "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu", "Vietnamese", "Welsh"],index=13)
24
+
25
+
26
+ # 1. Let user upload an image or video, or use camera to capture an image
27
+ uploaded_file = st.file_uploader("Upload image or video", label_visibility="hidden")
28
+
29
+ with col3:
30
+ # st.image("https://static.streamlit.io/examples/dog.jpg")
31
+
32
+ camera_image = st.camera_input("Or take a picture", label_visibility="hidden")
33
+
34
+ # Use the uploaded file or the camera image, whichever is available
35
+ file_to_process = uploaded_file if uploaded_file is not None else camera_image
36
+
37
+ if file_to_process is not None:
38
+ # Display the uploaded or captured image
39
+
40
+ with col2:
41
+
42
+ st.image(file_to_process, caption="Image")
43
+
44
+ # 2. Send the file to GPT4Vision for explanation
45
+ with st.spinner("Generating explanation..."):
46
+ explanation = image.describe(image_file=file_to_process, user_message="Describe this image in language:"+language)
47
+ st.success(explanation)
48
+ # 3. Display the explanation
49
+ # st.write(explanation)
50
+
51
+ # 4. Send response to GPTTTS to be spoken
52
+ with st.spinner("Generating audio..."):
53
+ audio_file_path = talk.generate_speech(text=explanation, model="tts-1-hd", voice="alloy")
54
+ # 5. Play audio automatically - Read the audio file and pass its content
55
+ with open(audio_file_path, "rb") as audio_file:
56
+ audio_data = audio_file.read()
57
+ st.audio(audio_data, format='audio/mp3', start_time=0)
58
+
59
+ # Optionally, delete the audio file after playing
60
+ # os.remove(audio_file_path)
61
+
62
+ if __name__ == "__main__":
63
+ main()