import os
import time
import requests
import random
from threading import Thread
from typing import List, Dict, Union
import torch
import gradio as gr
from bs4 import BeautifulSoup
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from functools import lru_cache
import re
import io 
import json

# Model Loading (Done once at startup)
MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16,
    device_map="auto",
    low_cpu_mem_usage=True
).eval()

# Path to example texts (Updated to remove image and video examples)
examples_path = os.path.dirname(__file__)
EXAMPLES = [
    [
        {
            "text": "What is Friction? Explain in Detail.",
        }
    ],
    [
        {
            "text": "Write me a Python function to generate unique passwords.",
        }
    ],
    [
        {
            "text": "What's the latest price of Bitcoin?",
        }
    ],
    [
        {
            "text": "Search and give me list of spaces trending on HuggingFace.",
        }
    ],
    [
        {
            "text": "Create a Beautiful Picture of Eiffel at Night.",
        }
    ],
    [
        {
            "text": "What unusual happens in this video.",
            "files": [f"{examples_path}/example_video/accident.gif"],
        }
    ],
    # Removed other image and video related examples
]

# Set bot avatar image
BOT_AVATAR = "OpenAI_logo.png"

# Perform a Google search and return the results
@lru_cache(maxsize=128) 
def extract_text_from_webpage(html_content):
    """Extracts visible text from HTML content using BeautifulSoup."""
    soup = BeautifulSoup(html_content, "html.parser")
    for tag in soup(["script", "style", "header", "footer", "nav", "form", "svg"]):
        tag.extract()
    visible_text = soup.get_text(separator=' ', strip=True)
    return visible_text

def search(query):
    term = query
    all_results = []
    max_chars_per_page = 8000
    with requests.Session() as session:
        resp = session.get(
            url="https://www.google.com/search",
            headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"},
            params={"q": term, "num": 4},
            timeout=5,
            verify=False,
        )
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, "html.parser")
        result_block = soup.find_all("div", attrs={"class": "g"})
        for result in result_block:
            link = result.find("a", href=True)
            if link and 'href' in link.attrs:
                link = link["href"]
                try:
                    webpage = session.get(link, headers={"User-Agent": "Mozilla/5.0"}, timeout=5, verify=False)
                    webpage.raise_for_status()
                    visible_text = extract_text_from_webpage(webpage.text)
                    if len(visible_text) > max_chars_per_page:
                        visible_text = visible_text[:max_chars_per_page]
                    all_results.append({"link": link, "text": visible_text})
                except requests.exceptions.RequestException:
                    all_results.append({"link": link, "text": None})
    return all_results

def generate_response(prompt, chat_history):
    # Construct the conversation history
    conversation = ""
    for user, assistant in chat_history:
        conversation += f"User: {user}\nAssistant: {assistant}\n"
    conversation += f"User: {prompt}\nAssistant:"

    inputs = tokenizer(conversation, return_tensors="pt").to("cuda")

    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    thread = Thread(target=model.generate, args=(inputs.input_ids,), kwargs={
        "max_new_tokens": 512,
        "do_sample": True,
        "top_p": 0.95,
        "temperature": 0.8,
        "streamer": streamer
    })
    thread.start()

    response = ""
    for new_text in streamer:
        response += new_text
        yield response

@lru_cache(maxsize=128)
def process_query(query, chat_history):
    # Here you can implement logic to decide between web_search, general_query, or hard_query
    # For simplicity, let's assume all queries go through general_query
    # You can expand this with your own logic
    functions_metadata = [
        {
            "type": "function",
            "function": {
                "name": "web_search",
                "description": "Search query on google and find latest information.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "query": {"type": "string", "description": "Web search query"}
                    },
                    "required": ["query"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "general_query",
                "description": "Reply general query with LLM.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "prompt": {"type": "string", "description": "A detailed prompt"}
                    },
                    "required": ["prompt"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "hard_query",
                "description": "Reply tough query using powerful LLM.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "prompt": {"type": "string", "description": "A detailed prompt"}
                    },
                    "required": ["prompt"]
                }
            }
        },
    ]

    # Example logic to choose function (you can customize this)
    if "search" in query.lower():
        function_name = "web_search"
    elif "explain" in query.lower() or "detail" in query.lower():
        function_name = "general_query"
    else:
        function_name = "hard_query"

    return {
        "name": function_name,
        "arguments": {
            "query" if function_name == "web_search" else "prompt": query
        }
    }

def handle_functions(function_call, chat_history):
    function_name = function_call["name"]
    arguments = function_call["arguments"]

    if function_name == "web_search":
        query = arguments["query"]
        web_results = search(query)
        web_summary = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results if res["text"]])
        # Append web results to chat history or pass to the model as context
        # Here we directly return the summarized web results
        return f"Here are the search results:\n{web_summary}"

    elif function_name in ["general_query", "hard_query"]:
        prompt = arguments["prompt"]
        # Generate response using the local model
        response_generator = generate_response(prompt, chat_history)
        return response_generator

    else:
        return "Function not recognized."

def model_inference(user_prompt, chat_history):
    prompt = user_prompt["text"]

    # Determine which function to call
    function_call = process_query(prompt, chat_history)

    if function_call["name"] == "web_search":
        yield "Performing web search..."
        result = handle_functions(function_call, chat_history)
        yield result

    elif function_call["name"] in ["general_query", "hard_query"]:
        yield "Generating response..."
        response_generator = handle_functions(function_call, chat_history)
        for response in response_generator:
            yield response

    else:
        yield "Invalid function call."

# Create a chatbot interface
chatbot = gr.Chatbot(
    label="OpenGPT-4o",
    avatar_images=[None, BOT_AVATAR],
    show_copy_button=True,
    layout="panel",
    height=400,
)
input_box = gr.Textbox(label="Prompt")

iface = gr.Interface(
    fn=model_inference,
    inputs=[input_box, chatbot],
    outputs=chatbot,
    live=True,
    examples=EXAMPLES,
    title="OpenGPT-4o Chatbot",
    description="A powerful AI assistant using local Llama-3.2 model.",
)

if __name__ == "__main__":
    iface.launch()