jorgik1 commited on
Commit
a3f5902
1 Parent(s): 054592e

Upload 6 files

Browse files
Files changed (6) hide show
  1. __init__.py +0 -0
  2. chatbot.py +92 -0
  3. faq.py +15 -0
  4. prompts.py +23 -0
  5. requirements.txt +15 -0
  6. sidebar.py +27 -0
__init__.py ADDED
File without changes
chatbot.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.document_loaders import YoutubeLoader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.vectorstores import FAISS
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.llms import HuggingFaceHub
6
+ from langchain.chains import LLMChain
7
+ from dotenv import find_dotenv, load_dotenv
8
+ from prompts import CHAT_PROMPT
9
+ from youtube_transcript_api import NoTranscriptFound
10
+ import streamlit as st
11
+ import os
12
+
13
+
14
+ class YouTubeChatbot:
15
+
16
+ def __init__(self):
17
+ load_dotenv(find_dotenv())
18
+
19
+ if (st.secrets.hugging_face_api_key is not None):
20
+ os.environ.setdefault("HUGGINGFACEHUB_API_TOKEN",
21
+ st.secrets.hugging_face_api_key)
22
+
23
+ try:
24
+ self.embeddings = HuggingFaceEmbeddings()
25
+ except Exception as e:
26
+ st.error("Failed to load the Hugging Face Embeddings model: " +
27
+ str(e))
28
+ self.embeddings = None
29
+
30
+ try:
31
+ repo_id = "tiiuae/falcon-7b-instruct"
32
+ self.falcon_llm = HuggingFaceHub(
33
+ repo_id=repo_id, model_kwargs={"temperature": 0.1, "max_new_tokens": 500}
34
+ )
35
+
36
+ except Exception as e:
37
+ st.error("Failed to load the Falcon LLM model: " + str(e))
38
+ self.falcon_llm = None
39
+
40
+
41
+ @st.cache_data
42
+ def create_db_from_youtube_video_url(_self, video_url):
43
+ st.info("Creating FAISS database from YouTube video.")
44
+ loader = YoutubeLoader.from_youtube_url(video_url)
45
+ try:
46
+ transcript = loader.load()
47
+ except NoTranscriptFound:
48
+ st.error("No transcript found for the video.")
49
+ return None
50
+
51
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
52
+ chunk_overlap=100)
53
+ docs = text_splitter.split_documents(transcript)
54
+ st.info("Number of documents: " + str(len(docs)))
55
+
56
+ try:
57
+ db = FAISS.from_documents(docs, _self.embeddings)
58
+ st.text("Created FAISS database from documents.")
59
+ return db
60
+ except Exception as e:
61
+ st.error("Failed to create FAISS database from documents: " +
62
+ str(e))
63
+ return None
64
+
65
+ @st.cache_data
66
+ def get_response_from_query(_self, _db, query, k=4):
67
+ if _db is None:
68
+ st.error(
69
+ "Database is not initialized. Please check the error messages."
70
+ )
71
+ return None
72
+
73
+ if _self.falcon_llm is None:
74
+ st.error(
75
+ "Falcon LLM model is not loaded. Please check the error messages."
76
+ )
77
+ return None
78
+
79
+ docs = _db.similarity_search(query, k=k)
80
+ docs_page_content = " ".join([d.page_content for d in docs])
81
+
82
+ try:
83
+ chain = LLMChain(llm=_self.falcon_llm, prompt=CHAT_PROMPT)
84
+ response = chain.run(
85
+ question=query,
86
+ docs=docs_page_content
87
+ )
88
+ response = response.replace("\n", "")
89
+ return response
90
+ except Exception as e:
91
+ st.error("Failed to generate a response: " + str(e))
92
+ return None
faq.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def faq():
4
+ st.markdown(
5
+ """# FAQ
6
+ ## How does YouTuberGPT work?
7
+ YouTuberGPT is a powerful tool that can help you quickly and easily find answers to your questions about YouTube videos. Simply input the URL of the video you're interested in, and YouTuberGPT will use its advanced semantic search capabilities to analyze the video and generate accurate and helpful answers to your questions.
8
+
9
+ ## Are the answers always accurate?
10
+ While YouTuberGPT uses the latest and most advanced language model (GPT-3) to generate answers, there may be some instances where the answers are not entirely accurate. However, for the vast majority of use cases, YouTuberGPT is extremely accurate and can provide valuable insights and information.
11
+
12
+ So go ahead and try out YouTuberGPT for yourself - we're confident that you'll find it to be an incredibly helpful tool for all your YouTube-related questions and needs!
13
+ """
14
+ )
15
+
prompts.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import (
2
+ SystemMessagePromptTemplate,
3
+ HumanMessagePromptTemplate,
4
+ ChatPromptTemplate)
5
+
6
+ # Human question prompt
7
+ human_template = "Answer the following question: {question}"
8
+ HUMAN_PROMPT = HumanMessagePromptTemplate.from_template(human_template)
9
+
10
+ # Template to use for the system message prompt
11
+ template = """
12
+ You are a helpful assistant that can answer questions about YouTube videos based on their transcripts.
13
+
14
+ To provide accurate answers, please refer to the factual information in the video transcript: {docs}
15
+
16
+ If you don't have enough information to answer the question, please respond with "I don't know".
17
+
18
+ Your answers should be detailed and provide as much information as possible.
19
+ """
20
+
21
+ PROMPT_TEMPLATE = SystemMessagePromptTemplate.from_template(template)
22
+
23
+ CHAT_PROMPT = ChatPromptTemplate.from_messages([PROMPT_TEMPLATE, HUMAN_PROMPT])
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.22.0
2
+ langchain==0.0.177
3
+ faiss-cpu==1.7.3
4
+ openai==0.26.2
5
+ tiktoken==0.4.0
6
+ streamlit_elements==0.1
7
+ youtube-transcript-api==0.6.0
8
+ python-dotenv==0.21.1
9
+ pytest==7.2.1
10
+ isort==5.12.0
11
+ black==23.1a1
12
+ flake8==6.0.0
13
+ streamlit_chat==0.0.2.2
14
+ huggingface_hub==0.15.1
15
+ sentence_transformers
sidebar.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from components.faq import faq
3
+
4
+ def sidebar():
5
+ with st.sidebar:
6
+ st.markdown("# 🤖YoutuberGPT")
7
+ st.markdown("---")
8
+ st.markdown("# How to use?")
9
+ st.markdown(
10
+ "- Input the URL of the video you are interested in "
11
+ "- YouTuberGPT will use its advanced semantic search "
12
+ "capabilities to analyze the video and generate accurate and helpful answer to your questions ")
13
+
14
+ st.markdown("# About")
15
+ st.markdown(
16
+ "YouTuberGPT allows you to ask questions about YouTube videos. "
17
+ )
18
+ faq()
19
+ st.markdown(
20
+ "This tool is a work in progress. "
21
+ "You can contribute to the project on [GitHub](https://github.com/jorgik1/youtuber_ai_chatbot) " # noqa: E501
22
+ "with your feedback and suggestions💡"
23
+ )
24
+ st.markdown("Made by [jorgik1](https://github.com/jorgik1)")
25
+ st.markdown("---")
26
+ st.markdown("# Donate")
27
+ st.markdown("[Buy me a coffee](https://www.buymeacoffee.com/youtubergtp)")