ns-devel
commited on
Commit
•
8677234
1
Parent(s):
482033a
Added openai and gemini for video QnA.
Browse files- __pycache__/settings.cpython-311.pyc +0 -0
- app.py +45 -0
- lib/__init__.py +0 -0
- lib/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/services/__pycache__/gemini.cpython-311.pyc +0 -0
- lib/services/__pycache__/hf_model.cpython-311.pyc +0 -0
- lib/services/__pycache__/openai.cpython-311.pyc +0 -0
- lib/services/gemini.py +40 -0
- lib/services/hf_model.py +31 -0
- lib/services/openai.py +35 -0
- requirements.txt +68 -0
- settings.py +4 -0
__pycache__/settings.cpython-311.pyc
ADDED
Binary file (486 Bytes). View file
|
|
app.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from lib.services.hf_model import get_transcript
|
3 |
+
from lib.services.gemini import gemini
|
4 |
+
from lib.services.openai import get_completion
|
5 |
+
|
6 |
+
|
7 |
+
@st.cache_resource
|
8 |
+
def get_cached_transcript(video_url):
|
9 |
+
return get_transcript(video_url)
|
10 |
+
|
11 |
+
|
12 |
+
def main():
|
13 |
+
st.title("VideoClarify")
|
14 |
+
|
15 |
+
# Get video URL from user
|
16 |
+
video_url = st.text_input("Enter Video URL:", key="video_url")
|
17 |
+
selected_model = st.sidebar.selectbox("Select Model", ["Gemini", "OpenAI"])
|
18 |
+
print(selected_model)
|
19 |
+
if video_url:
|
20 |
+
st.video(video_url)
|
21 |
+
# Get transcript from the video
|
22 |
+
transcript = get_cached_transcript(video_url)
|
23 |
+
# Provide an input box for user to ask a question
|
24 |
+
question = st.text_input(
|
25 |
+
label="Ask a question about the video:", key="question")
|
26 |
+
|
27 |
+
if st.button("Get Answer"):
|
28 |
+
if question:
|
29 |
+
if selected_model == "Gemini":
|
30 |
+
st.info("Using Gemini to answer the question.")
|
31 |
+
# Use Gemini to summarize and answer the question
|
32 |
+
response = gemini(transcript, question)
|
33 |
+
if selected_model == "OpenAI":
|
34 |
+
st.info("Using OpenAI to answer the question.")
|
35 |
+
# Use OpenAI to summarize and answer the question
|
36 |
+
response = get_completion(transcript, question)
|
37 |
+
# Display the result to the user
|
38 |
+
st.subheader("Result:")
|
39 |
+
st.write(response)
|
40 |
+
else:
|
41 |
+
st.info("Please ask a question about the video.")
|
42 |
+
|
43 |
+
|
44 |
+
if __name__ == "__main__":
|
45 |
+
main()
|
lib/__init__.py
ADDED
File without changes
|
lib/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (163 Bytes). View file
|
|
lib/services/__pycache__/gemini.cpython-311.pyc
ADDED
Binary file (1.93 kB). View file
|
|
lib/services/__pycache__/hf_model.cpython-311.pyc
ADDED
Binary file (2.05 kB). View file
|
|
lib/services/__pycache__/openai.cpython-311.pyc
ADDED
Binary file (1.53 kB). View file
|
|
lib/services/gemini.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
import PIL.Image
|
5 |
+
import google.generativeai as genai
|
6 |
+
|
7 |
+
def configure_genai(api_key):
|
8 |
+
genai.configure(api_key=api_key)
|
9 |
+
|
10 |
+
|
11 |
+
def generate_content(model, prompt, stream=True):
|
12 |
+
response = model.generate_content(prompt, stream=stream)
|
13 |
+
response.resolve()
|
14 |
+
return response.text
|
15 |
+
|
16 |
+
|
17 |
+
def gemini(transcript, question):
|
18 |
+
print(transcript, question)
|
19 |
+
configure_genai(os.environ['GOOGLE_API_KEY'])
|
20 |
+
|
21 |
+
# Create GenerativeModel instance
|
22 |
+
model = genai.GenerativeModel('gemini-pro')
|
23 |
+
|
24 |
+
# Generate content using the model and image
|
25 |
+
prompt = [f"""
|
26 |
+
Transcript:
|
27 |
+
```
|
28 |
+
{transcript}
|
29 |
+
```
|
30 |
+
Provided is a video transcript enclosed within triple backticks. Your task is to respond to questions that are either based on or directly related to the content of the video transcript. If the question does not pertain to or is not in the context of the video transcript, please reply with "Please ask questions related to the video only."
|
31 |
+
|
32 |
+
Note:
|
33 |
+
- Do not include `video transcript` in your response, refer it as `video`.
|
34 |
+
|
35 |
+
Question: {question}
|
36 |
+
"""]
|
37 |
+
response_text = generate_content(model, prompt)
|
38 |
+
|
39 |
+
return response_text
|
40 |
+
# Optionally display as Markdown
|
lib/services/hf_model.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from settings import HF_API_URL, DATA_DIR
|
4 |
+
from pathlib import Path
|
5 |
+
from moviepy.editor import VideoFileClip
|
6 |
+
|
7 |
+
def convert_video_to_wav(video_path, output_path):
|
8 |
+
"""
|
9 |
+
Converts a video file to a WAV audio file.
|
10 |
+
|
11 |
+
Args:
|
12 |
+
video_path (str): The path of the video file to be converted.
|
13 |
+
output_path (str): The desired path for the output WAV audio file.
|
14 |
+
|
15 |
+
Returns:
|
16 |
+
None
|
17 |
+
"""
|
18 |
+
video_clip = VideoFileClip(video_path)
|
19 |
+
audio_clip = video_clip.audio
|
20 |
+
audio_clip.write_audiofile(output_path)
|
21 |
+
|
22 |
+
def get_transcript(filepath):
|
23 |
+
audio_file = Path(DATA_DIR).joinpath(Path(filepath).stem + ".wav")
|
24 |
+
if not audio_file.exists():
|
25 |
+
convert_video_to_wav(filepath, audio_file)
|
26 |
+
headers = {"Authorization": f"Bearer {os.environ['HF_KEY']}"}
|
27 |
+
with open(audio_file, "rb") as f:
|
28 |
+
data = f.read()
|
29 |
+
response = requests.post(HF_API_URL, headers=headers,
|
30 |
+
data=data)
|
31 |
+
return response.json()["text"]
|
lib/services/openai.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
def get_completion(transcript, question):
|
3 |
+
"""
|
4 |
+
Generate a text completion using OpenAI's GPT-3 model.
|
5 |
+
|
6 |
+
Args:
|
7 |
+
prompt (str): The input text prompt for text generation.
|
8 |
+
model (str, optional): The GPT-3 model to use. Default is "gpt-3.5-turbo-16k".
|
9 |
+
|
10 |
+
Returns:
|
11 |
+
str: The generated text based on the prompt.
|
12 |
+
"""
|
13 |
+
messages = [
|
14 |
+
{
|
15 |
+
"role": "system",
|
16 |
+
"content": f"""
|
17 |
+
Transcript:
|
18 |
+
```
|
19 |
+
{transcript}
|
20 |
+
```
|
21 |
+
Provided is a video transcript enclosed within triple backticks. Your task is to respond to questions that are either based on or directly related to the content of the video transcript. If the question does not pertain to or is not in the context of the video transcript, please reply with "Please ask questions related to the video only."
|
22 |
+
|
23 |
+
Note:
|
24 |
+
- Do not include `video transcript` in your response, refer it as `video`.
|
25 |
+
|
26 |
+
Question: {question}
|
27 |
+
"""
|
28 |
+
}
|
29 |
+
]
|
30 |
+
response = openai.ChatCompletion.create(
|
31 |
+
model="gpt-3.5-turbo-16k",
|
32 |
+
messages=messages,
|
33 |
+
temperature=0.2, # This is the degree of randomness of the model's output
|
34 |
+
)
|
35 |
+
return response.choices[0].message["content"]
|
requirements.txt
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohttp==3.9.1
|
2 |
+
aiosignal==1.3.1
|
3 |
+
altair==5.2.0
|
4 |
+
attrs==23.2.0
|
5 |
+
blinker==1.7.0
|
6 |
+
cachetools==5.3.2
|
7 |
+
certifi==2023.11.17
|
8 |
+
charset-normalizer==3.3.2
|
9 |
+
click==8.1.7
|
10 |
+
decorator==4.4.2
|
11 |
+
frozenlist==1.4.1
|
12 |
+
gitdb==4.0.11
|
13 |
+
GitPython==3.1.41
|
14 |
+
google-ai-generativelanguage==0.4.0
|
15 |
+
google-api-core==2.15.0
|
16 |
+
google-auth==2.26.2
|
17 |
+
google-generativeai==0.3.2
|
18 |
+
googleapis-common-protos==1.62.0
|
19 |
+
grpcio==1.60.0
|
20 |
+
grpcio-status==1.60.0
|
21 |
+
idna==3.6
|
22 |
+
imageio==2.33.1
|
23 |
+
imageio-ffmpeg==0.4.9
|
24 |
+
importlib-metadata==7.0.1
|
25 |
+
Jinja2==3.1.3
|
26 |
+
jsonschema==4.21.1
|
27 |
+
jsonschema-specifications==2023.12.1
|
28 |
+
markdown-it-py==3.0.0
|
29 |
+
MarkupSafe==2.1.4
|
30 |
+
mdurl==0.1.2
|
31 |
+
moviepy==1.0.3
|
32 |
+
multidict==6.0.4
|
33 |
+
numpy==1.26.3
|
34 |
+
openai==0.28.0
|
35 |
+
packaging==23.2
|
36 |
+
pandas==2.2.0
|
37 |
+
pillow==10.2.0
|
38 |
+
proglog==0.1.10
|
39 |
+
proto-plus==1.23.0
|
40 |
+
protobuf==4.25.2
|
41 |
+
pyarrow==14.0.2
|
42 |
+
pyasn1==0.5.1
|
43 |
+
pyasn1-modules==0.3.0
|
44 |
+
pydeck==0.8.1b0
|
45 |
+
Pygments==2.17.2
|
46 |
+
python-dateutil==2.8.2
|
47 |
+
pytz==2023.3.post1
|
48 |
+
referencing==0.32.1
|
49 |
+
requests==2.31.0
|
50 |
+
rich==13.7.0
|
51 |
+
rpds-py==0.17.1
|
52 |
+
rsa==4.9
|
53 |
+
six==1.16.0
|
54 |
+
smmap==5.0.1
|
55 |
+
streamlit==1.30.0
|
56 |
+
tenacity==8.2.3
|
57 |
+
toml==0.10.2
|
58 |
+
toolz==0.12.0
|
59 |
+
tornado==6.4
|
60 |
+
tqdm==4.66.1
|
61 |
+
typing_extensions==4.9.0
|
62 |
+
tzdata==2023.4
|
63 |
+
tzlocal==5.2
|
64 |
+
urllib3==2.1.0
|
65 |
+
validators==0.22.0
|
66 |
+
watchdog==3.0.0
|
67 |
+
yarl==1.9.4
|
68 |
+
zipp==3.17.0
|
settings.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
HF_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v2"
|
3 |
+
BASE_DIR = Path(__file__).parent
|
4 |
+
DATA_DIR = Path(BASE_DIR).joinpath("data")
|