import gradio as gr
import faiss
import numpy as np
import openai
from sentence_transformers import SentenceTransformer
from nltk.tokenize import sent_tokenize
import nltk

# Download the required NLTK data
nltk.download('punkt')
nltk.download('punkt_tab')

# Paths to your files
faiss_path = "manual_chunked_faiss_index_500.bin"
manual_path = "ubuntu_manual.txt"

# Load the Ubuntu manual from a .txt file
try:
    with open(manual_path, "r", encoding="utf-8") as file:
        full_text = file.read()
except FileNotFoundError:
    raise FileNotFoundError(f"The file {manual_path} was not found.")

# Function to chunk the text into smaller pieces
def chunk_text(text, chunk_size=500):
    sentences = sent_tokenize(text)
    chunks = []
    current_chunk = []

    for sentence in sentences:
        if len(current_chunk) + len(sentence.split()) <= chunk_size:
            current_chunk.append(sentence)
        else:
            chunks.append(" ".join(current_chunk))
            current_chunk = [sentence]

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    return chunks

# Apply chunking to the entire text
manual_chunks = chunk_text(full_text, chunk_size=500)

# Load your FAISS index
try:
    index = faiss.read_index(faiss_path)
except Exception as e:
    raise RuntimeError(f"Failed to load FAISS index: {e}")

# Load the tokenizer and model for embeddings
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
model = AutoModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased")

embedding_model = SentenceTransformer('microsoft/MiniLM-L12-H384-uncased')

# OpenAI API key
openai.api_key = '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'

# Function to create embeddings
def embed_text(text_list):
    return np.array(embedding_model.encode(text_list), dtype=np.float32)

# Function to retrieve relevant chunks for a user query
def retrieve_chunks(query, k=5):
    query_embedding = embed_text([query])
    
    try:
        distances, indices = index.search(query_embedding, k=k)
        print("Distances:", distances)
        print("Indices:", indices)
    except Exception as e:
        raise RuntimeError(f"FAISS search failed: {e}")
    
    if len(indices[0]) == 0:
        return [], distances, indices

    valid_indices = [i for i in indices[0] if i < len(manual_chunks)]
    if not valid_indices:
        return [], distances, indices

    relevant_chunks = [manual_chunks[i] for i in valid_indices]
    return relevant_chunks, distances, indices

# Function to truncate long inputs
def truncate_input(text, max_length=16385):
    tokens = tokenizer.encode(text, truncation=True, max_length=max_length, return_tensors="pt")
    return tokens

# Function to perform RAG: Retrieve chunks and generate a response
def rag_response(query, k=5, max_tokens=150):
    try:
        relevant_chunks, distances, indices = retrieve_chunks(query, k=k)
        
        if not relevant_chunks:
            return "Sorry, I couldn't find relevant information.", distances, indices

        # Combine the query with retrieved chunks
        augmented_input = query + "\n\n" + "\n\n".join(relevant_chunks)
        
        # Truncate the input if it exceeds token limits
        input_tokens = tokenizer.encode(augmented_input, return_tensors="pt")
        if input_tokens.shape[1] > 16385:
            # Truncate to fit within the model's maximum input length
            augmented_input = tokenizer.decode(input_tokens[0, :16385])

        # Generate response using OpenAI API
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": augmented_input}
            ],
            max_tokens=max_tokens,
            temperature=0.7
        )
        generated_text = response.choices[0].message['content'].strip()
        return generated_text, distances, indices
    except Exception as e:
        return f"An error occurred: {e}", [], []

# Gradio Interface

# Gradio Interface
def format_output(response, distances, indices):
    formatted_response = f"Response: {response}\n\nDistances: {distances}\n\nIndices: {indices}"
    return formatted_response

iface = gr.Interface(
    fn=rag_response,
    inputs="text",
    outputs="text",
    title="RAG Chatbot with FAISS and GPT-3.5",
    description="Ask me anything!",
    live=True
)

if __name__ == "__main__":
    iface.launch()