import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch import re # Load the model and tokenizer model_name = "Qwen/Qwen2.5-0.5B" tokenizer = AutoTokenizer.from_pretrained(model_name, token="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") model = AutoModelForCausalLM.from_pretrained(model_name, token="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") # System prompt system_prompt = """You are BlackBerry, an advanced AI model with the "Little Thinking" technique. You use four "Berry" thinkers to analyze queries and provide accurate responses.""" def generate_response(prompt, max_length=100): full_prompt = f"{system_prompt}\n\nUser: {prompt}\n\nBlackBerry:" inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512) with torch.no_grad(): outputs = model.generate(**inputs, max_length=max_length, num_return_sequences=1, temperature=0.7) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response.split("BlackBerry:")[-1].strip() def little_thinking(prompt): thoughts = [] for i in range(4): thought = f"*Berry-{i+1}: {generate_response(f'As Berry-{i+1}, briefly analyze: {prompt}', max_length=50)}*\n\n" thoughts.append(thought) return "".join(thoughts) def reviewer_thinking(prompt): return f"*Reviewer: {generate_response(f'As a Reviewer, briefly check: {prompt}', max_length=50)}*\n\n" def second_reviewer_thinking(prompt): return f"*Second Reviewer: {generate_response(f'As a Second Reviewer, briefly verify: {prompt}', max_length=50)}*\n\n" def blackberry_response(prompt): response = "BlackBerry: Analyzing with Little Thinking technique.\n\n" # Little Thinking process response += little_thinking(prompt) # Initial answer response += f"BlackBerry: Initial answer:\n{generate_response(prompt, max_length=100)}\n\n" # Reviewer response += reviewer_thinking(response) # Second Reviewer for hard questions if re.search(r'\b(physics|science|coordinate|hard|difficult)\b', prompt, re.IGNORECASE): response += second_reviewer_thinking(response) # Final answer response += f"BlackBerry: Final answer:\n{generate_response(prompt, max_length=150)}" return response # Create the Gradio interface iface = gr.Interface( fn=blackberry_response, inputs=gr.Textbox(lines=5, label="Enter your query"), outputs=gr.Textbox(label="BlackBerry's Response"), title="Blackberry-1 LLM", description="Powered by meta-llama/Llama-3.2-1B with 'Little Thinking' technique" ) # Launch the app iface.launch()