import gradio as gr
from langchain_community.vectorstores import Qdrant
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pytubefix import YouTube
from qdrant_client import QdrantClient
from langchain_groq import ChatGroq
import re

# Function to extract the transcript text
def get_text(video_id):
    yt = YouTube(video_id)
    caption = yt.captions.get_by_language_code('en')
    transcript = caption.generate_srt_captions()

    # Split the transcript into lines
    lines = transcript.splitlines()

    # Extract text from every third line (lines 3, 6, 9, ...)
    extracted_text = " ".join(lines[i] for i in range(2, len(lines), 4))

    return extracted_text

# Function to create the Qdrant database
def create_qdrant_database(url):
    text = get_text(url)
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=10000,
        chunk_overlap=1000
    )

    docs = text_splitter.split_text(text)

    model_name = 'BAAI/bge-large-en'
    model_kwargs = {'device': 'cpu'}
    encode_kwargs = {'normalize_embeddings': False}
    embeddings = HuggingFaceBgeEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs
    )

    collection_name = "Youtube_Videos"
    
    qdrant_url = "https://ec1c2790-c2e2-4c78-943f-5f9772492b2e.europe-west3-0.gcp.cloud.qdrant.io:6333"
    api_key = "zIUUg_1QTtjSmCLNEpKnxJZeedKuh635c-YgGkDbI5EJ0ITjpOSyqw"

    qdrant = Qdrant.from_texts(
        texts=docs,
        embedding=embeddings,
        url=qdrant_url,
        prefer_grpc=False,
        collection_name=collection_name,
        api_key=api_key,
        timeout=50
    )

    return "Qdrant database created" 

# Function to answer questions based on the created Qdrant database
def get_answer(question):
    qdrant_url = "https://ec1c2790-c2e2-4c78-943f-5f9772492b2e.europe-west3-0.gcp.cloud.qdrant.io:6333"
    api_key = "zIUUg_1QTtjSmCLNEpKnxJZeedKuh635c-YgGkDbI5EJ0ITjpOSyqw"

    # Initialize the embeddings and Qdrant client
    model_name = 'BAAI/bge-large-en'
    model_kwargs = {'device': 'cpu'}
    encode_kwargs = {'normalize_embeddings': False}
    embeddings = HuggingFaceBgeEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs
    )

    client = QdrantClient(
        url=qdrant_url,
        prefer_grpc=False,
        api_key=api_key,
        timeout=50
    )
    collection_name = "Youtube_Videos"
    db = Qdrant(
        client=client,
        embeddings=embeddings,
        collection_name=collection_name,
    
    )

    # Initialize ChatGroq model
    api_key = "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"
    model_name = "llama-3.1-70b-versatile"
    model = ChatGroq(api_key=api_key, model=model_name, temperature=0)

    # Search for the relevant document and generate the answer
    docs = db.similarity_search_with_score(query=question, k=1)
    for doc, score in docs:
        return model.invoke(f"{question} : {doc.page_content}")

# Gradio Interface
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            url_input = gr.Textbox(label="YouTube Video URL")
            output_text = gr.Textbox(label="Result")
            run_button = gr.Button("Create Qdrant Database")
            run_button.click(fn=create_qdrant_database, inputs=url_input, outputs=output_text)
        
        with gr.Column():
            question_input = gr.Textbox(label="Ask a Question")
            answer_output = gr.Textbox(label="Answer")
            ask_button = gr.Button("Get Answer")
            ask_button.click(fn=get_answer, inputs=question_input, outputs=answer_output)

demo.launch()