BlackBerry-1 / app.py
underwater45's picture
Update app.py
005bde7 verified
raw
history blame
2.64 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re
# Load the model and tokenizer
model_name = "Qwen/Qwen2.5-0.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name, token="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")
model = AutoModelForCausalLM.from_pretrained(model_name, token="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")
# System prompt
system_prompt = """You are BlackBerry, an advanced AI model with the "Little Thinking" technique. You use four "Berry" thinkers to analyze queries and provide accurate responses."""
def generate_response(prompt, max_length=100):
full_prompt = f"{system_prompt}\n\nUser: {prompt}\n\nBlackBerry:"
inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
outputs = model.generate(**inputs, max_length=max_length, num_return_sequences=1, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.split("BlackBerry:")[-1].strip()
def little_thinking(prompt):
thoughts = []
for i in range(4):
thought = f"*Berry-{i+1}: {generate_response(f'As Berry-{i+1}, briefly analyze: {prompt}', max_length=50)}*\n\n"
thoughts.append(thought)
return "".join(thoughts)
def reviewer_thinking(prompt):
return f"*Reviewer: {generate_response(f'As a Reviewer, briefly check: {prompt}', max_length=50)}*\n\n"
def second_reviewer_thinking(prompt):
return f"*Second Reviewer: {generate_response(f'As a Second Reviewer, briefly verify: {prompt}', max_length=50)}*\n\n"
def blackberry_response(prompt):
response = "BlackBerry: Analyzing with Little Thinking technique.\n\n"
# Little Thinking process
response += little_thinking(prompt)
# Initial answer
response += f"BlackBerry: Initial answer:\n{generate_response(prompt, max_length=100)}\n\n"
# Reviewer
response += reviewer_thinking(response)
# Second Reviewer for hard questions
if re.search(r'\b(physics|science|coordinate|hard|difficult)\b', prompt, re.IGNORECASE):
response += second_reviewer_thinking(response)
# Final answer
response += f"BlackBerry: Final answer:\n{generate_response(prompt, max_length=150)}"
return response
# Create the Gradio interface
iface = gr.Interface(
fn=blackberry_response,
inputs=gr.Textbox(lines=5, label="Enter your query"),
outputs=gr.Textbox(label="BlackBerry's Response"),
title="Blackberry-1 LLM",
description="Powered by meta-llama/Llama-3.2-1B with 'Little Thinking' technique"
)
# Launch the app
iface.launch()