Spaces:

mlnsio
/

videoChat

Sleeping

ns-devel commited on Jan 20

Commit

38efda5

•

1 Parent(s): 8677234

Added youtube url option

Files changed (5) hide show

__pycache__/settings.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/settings.cpython-311.pyc and b/__pycache__/settings.cpython-311.pyc differ

app.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import streamlit as st
 from lib.services.hf_model import get_transcript
 from lib.services.gemini import gemini
 from lib.services.openai import get_completion
@@ -9,14 +11,30 @@ def get_cached_transcript(video_url):
     return get_transcript(video_url)
 def main():
     st.title("VideoClarify")
     # Get video URL from user
     video_url = st.text_input("Enter Video URL:", key="video_url")
     selected_model = st.sidebar.selectbox("Select Model", ["Gemini", "OpenAI"])
-    print(selected_model)
-    if video_url:
         st.video(video_url)
         # Get transcript from the video
         transcript = get_cached_transcript(video_url)

 import streamlit as st
+from pytube import YouTube
+from settings import DATA_DIR
 from lib.services.hf_model import get_transcript
 from lib.services.gemini import gemini
 from lib.services.openai import get_completion
     return get_transcript(video_url)
+def download_youtube_video(video_url):
+    try:
+        # Create a YouTube object
+        yt = YouTube(video_url)
+        # Get the highest resolution stream
+        video_stream = yt.streams.get_highest_resolution()
+        # Download the video to the specified output path
+        file_path = video_stream.download(DATA_DIR)
+        return file_path
+    except Exception as e:
+        return None
 def main():
     st.title("VideoClarify")
     # Get video URL from user
     video_url = st.text_input("Enter Video URL:", key="video_url")
     selected_model = st.sidebar.selectbox("Select Model", ["Gemini", "OpenAI"])
+    if len(video_url):
+        video_url = download_youtube_video(video_url)
+        print(video_url)
         st.video(video_url)
         # Get transcript from the video
         transcript = get_cached_transcript(video_url)

lib/services/__pycache__/hf_model.cpython-311.pyc CHANGED Viewed

Binary files a/lib/services/__pycache__/hf_model.cpython-311.pyc and b/lib/services/__pycache__/hf_model.cpython-311.pyc differ

lib/services/hf_model.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os
 import requests
 from settings import HF_API_URL, DATA_DIR
 from pathlib import Path
 from moviepy.editor import VideoFileClip
@@ -19,8 +21,9 @@ def convert_video_to_wav(video_path, output_path):
     audio_clip = video_clip.audio
     audio_clip.write_audiofile(output_path)
-def get_transcript(filepath):
     audio_file = Path(DATA_DIR).joinpath(Path(filepath).stem + ".wav")
     if not audio_file.exists():
         convert_video_to_wav(filepath, audio_file)
     headers = {"Authorization": f"Bearer {os.environ['HF_KEY']}"}
@@ -28,4 +31,29 @@ def get_transcript(filepath):
         data = f.read()
     response = requests.post(HF_API_URL, headers=headers,
                              data=data)
     return response.json()["text"]

 import os
 import requests
+import torch
+from transformers import pipeline
 from settings import HF_API_URL, DATA_DIR
 from pathlib import Path
 from moviepy.editor import VideoFileClip
     audio_clip = video_clip.audio
     audio_clip.write_audiofile(output_path)
+def get_transcript1(filepath):
     audio_file = Path(DATA_DIR).joinpath(Path(filepath).stem + ".wav")
+    print(audio_file)
     if not audio_file.exists():
         convert_video_to_wav(filepath, audio_file)
     headers = {"Authorization": f"Bearer {os.environ['HF_KEY']}"}
         data = f.read()
     response = requests.post(HF_API_URL, headers=headers,
                              data=data)
+    print(response, response.json())
     return response.json()["text"]
+def get_transcript(url):
+    """
+    Converts a audio file to text and provides corresponding time stamps.
+    """
+    # Model to find wav to text and time stamps
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    pipe = pipeline(
+        "automatic-speech-recognition", model="openai/whisper-base", device=device
+    )
+    file_data = pipe(
+        url,
+        max_new_tokens=256,
+        generate_kwargs={"task": "transcribe"},
+        chunk_length_s=30,
+        batch_size=8,
+        return_timestamps=True,
+    )["chunks"]
+    text = ""
+    for doc in file_data:
+        text += doc["text"]
+    return text

requirements.txt CHANGED Viewed

@@ -8,7 +8,9 @@ certifi==2023.11.17
 charset-normalizer==3.3.2
 click==8.1.7
 decorator==4.4.2
 frozenlist==1.4.1
 gitdb==4.0.11
 GitPython==3.1.41
 google-ai-generativelanguage==0.4.0
@@ -18,6 +20,7 @@ google-generativeai==0.3.2
 googleapis-common-protos==1.62.0
 grpcio==1.60.0
 grpcio-status==1.60.0
 idna==3.6
 imageio==2.33.1
 imageio-ffmpeg==0.4.9
@@ -29,8 +32,22 @@ markdown-it-py==3.0.0
 MarkupSafe==2.1.4
 mdurl==0.1.2
 moviepy==1.0.3
 multidict==6.0.4
 numpy==1.26.3
 openai==0.28.0
 packaging==23.2
 pandas==2.2.0
@@ -44,20 +61,29 @@ pyasn1-modules==0.3.0
 pydeck==0.8.1b0
 Pygments==2.17.2
 python-dateutil==2.8.2
 pytz==2023.3.post1
 referencing==0.32.1
 requests==2.31.0
 rich==13.7.0
 rpds-py==0.17.1
 rsa==4.9
 six==1.16.0
 smmap==5.0.1
 streamlit==1.30.0
 tenacity==8.2.3
 toml==0.10.2
 toolz==0.12.0
 tornado==6.4
 tqdm==4.66.1
 typing_extensions==4.9.0
 tzdata==2023.4
 tzlocal==5.2

 charset-normalizer==3.3.2
 click==8.1.7
 decorator==4.4.2
+filelock==3.13.1
 frozenlist==1.4.1
+fsspec==2023.12.2
 gitdb==4.0.11
 GitPython==3.1.41
 google-ai-generativelanguage==0.4.0
 googleapis-common-protos==1.62.0
 grpcio==1.60.0
 grpcio-status==1.60.0
+huggingface-hub==0.20.2
 idna==3.6
 imageio==2.33.1
 imageio-ffmpeg==0.4.9
 MarkupSafe==2.1.4
 mdurl==0.1.2
 moviepy==1.0.3
+mpmath==1.3.0
 multidict==6.0.4
+networkx==3.2.1
 numpy==1.26.3
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.18.1
+nvidia-nvjitlink-cu12==12.3.101
+nvidia-nvtx-cu12==12.1.105
 openai==0.28.0
 packaging==23.2
 pandas==2.2.0
 pydeck==0.8.1b0
 Pygments==2.17.2
 python-dateutil==2.8.2
+pytube==15.0.0
 pytz==2023.3.post1
+PyYAML==6.0.1
 referencing==0.32.1
+regex==2023.12.25
 requests==2.31.0
 rich==13.7.0
 rpds-py==0.17.1
 rsa==4.9
+safetensors==0.4.1
 six==1.16.0
 smmap==5.0.1
 streamlit==1.30.0
+sympy==1.12
 tenacity==8.2.3
+tokenizers==0.15.0
 toml==0.10.2
 toolz==0.12.0
+torch==2.1.2
 tornado==6.4
 tqdm==4.66.1
+transformers==4.36.2
+triton==2.1.0
 typing_extensions==4.9.0
 tzdata==2023.4
 tzlocal==5.2