import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re

# Load the model and tokenizer
model_name = "Qwen/Qwen2.5-0.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name, token="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")
model = AutoModelForCausalLM.from_pretrained(model_name, token="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")

# System prompt
system_prompt = """You are BlackBerry, an advanced AI model with the "Little Thinking" technique. You use four "Berry" thinkers to analyze queries and provide accurate responses."""

def generate_response(prompt, max_length=100):
    full_prompt = f"{system_prompt}\n\nUser: {prompt}\n\nBlackBerry:"
    inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=max_length, num_return_sequences=1, temperature=0.7)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("BlackBerry:")[-1].strip()

def little_thinking(prompt):
    thoughts = []
    for i in range(4):
        thought = f"*Berry-{i+1}: {generate_response(f'As Berry-{i+1}, briefly analyze: {prompt}', max_length=50)}*\n\n"
        thoughts.append(thought)
    return "".join(thoughts)

def reviewer_thinking(prompt):
    return f"*Reviewer: {generate_response(f'As a Reviewer, briefly check: {prompt}', max_length=50)}*\n\n"

def second_reviewer_thinking(prompt):
    return f"*Second Reviewer: {generate_response(f'As a Second Reviewer, briefly verify: {prompt}', max_length=50)}*\n\n"

def blackberry_response(prompt):
    response = "BlackBerry: Analyzing with Little Thinking technique.\n\n"
    
    # Little Thinking process
    response += little_thinking(prompt)
    
    # Initial answer
    response += f"BlackBerry: Initial answer:\n{generate_response(prompt, max_length=100)}\n\n"
    
    # Reviewer
    response += reviewer_thinking(response)
    
    # Second Reviewer for hard questions
    if re.search(r'\b(physics|science|coordinate|hard|difficult)\b', prompt, re.IGNORECASE):
        response += second_reviewer_thinking(response)
    
    # Final answer
    response += f"BlackBerry: Final answer:\n{generate_response(prompt, max_length=150)}"
    
    return response

# Create the Gradio interface
iface = gr.Interface(
    fn=blackberry_response,
    inputs=gr.Textbox(lines=5, label="Enter your query"),
    outputs=gr.Textbox(label="BlackBerry's Response"),
    title="Blackberry-1 LLM",
    description="Powered by meta-llama/Llama-3.2-1B with 'Little Thinking' technique"
)

# Launch the app
iface.launch()