Spaces:
Sleeping
Sleeping
from langchain.document_loaders import YoutubeLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.llms import OpenAI | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.prompts import PromptTemplate | |
from langchain.chains import LLMChain | |
from langchain.vectorstores import FAISS | |
from dotenv import load_dotenv | |
import gradio as gr | |
from langchain.document_loaders import YoutubeLoader | |
load_dotenv() | |
embeddings = OpenAIEmbeddings() | |
video_url = "https://www.youtube.com/watch?v=PfTOr3ONKzU" | |
def create_vector_db_from_youtube_url(video_url: str): | |
loader = YoutubeLoader.from_youtube_url(video_url) | |
transcript = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
docs = text_splitter.split_documents(transcript) | |
db = FAISS.from_documents(docs, embeddings) | |
return db | |
# create_vector_db_from_youtube_url(video_url) | |
def get_response_from_query(db, query, k=4): | |
docs = db.similarity_search(query, k=k) | |
docs_page_content = " ".join([d.page_content for d in docs]) | |
llm = OpenAI(model_name="text-davinci-003") | |
prompt = PromptTemplate( | |
input_variables=["question", "docs"], | |
template = """ | |
Youare a helpful Youtube assistant that can answer questions about videos based on video transcript. | |
Answer the following question: {question} | |
By searching the following video transcript: {docs} | |
Only use the factua; information from the transcript to answer the question. | |
If you feel like you dont have enough information to answer the question, say "I dont know". | |
Your answer ahould be detailed. | |
""" | |
) | |
chain = LLMChain(llm=llm, prompt=prompt) | |
response = chain.run(question = query, docs = docs_page_content) | |
response = response.replace("\n", " ") | |
return response | |
def gradio_interface(youtube_url, query): | |
if query and youtube_url: | |
db = create_vector_db_from_youtube_url(youtube_url) | |
response = get_response_from_query(db, query) | |
return response | |
# Membuat antarmuka Gradio | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs=["text", "text"], # Dua input teks: URL YouTube dan pertanyaan | |
outputs="text", # Output berupa teks | |
title="YouTube Assistant", | |
description="Masukkan URL YouTube dan ajukan pertanyaan tentang video tersebut." | |
) | |
# Menjalankan antarmuka Gradio | |
iface.launch(debug=True, share=True) | |