Qwen2.5-72B / app.py
Sg-at-srijan-us-kg's picture
Update app.py
d6f3459 verified
raw
history blame
5.59 kB
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer
client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-32B-Instruct")
# Global variables
uploaded_file_content = ""
def truncate_text_by_tokens(text, max_tokens):
"""Truncate text to max_tokens while trying to preserve complete sentences."""
# Tokenize the input text
tokens = tokenizer.encode(text)
# If the text is already within the token limit, return as is
if len(tokens) <= max_tokens:
return text
# Otherwise, truncate the token list and decode it back to text
truncated_tokens = tokens[:max_tokens]
truncated_text = tokenizer.decode(truncated_tokens, skip_special_tokens=True)
return truncated_text
def handle_file_upload(file_obj):
global uploaded_file_content
if file_obj is None:
return "No file uploaded."
try:
# Read file content
content = file_obj.read()
try:
file_content = content.decode('utf-8')
except UnicodeDecodeError:
file_content = content.decode('latin-1')
# Store full content but truncate for preview
uploaded_file_content = file_content
preview = truncate_text_by_tokens(file_content, max_tokens=100)[:200] + "..." # Preview truncated content
total_tokens = len(tokenizer.encode(file_content))
usable_tokens = len(tokenizer.encode(uploaded_file_content))
return f"""File uploaded successfully!
Total length: {total_tokens} tokens
Usable length for AI: {usable_tokens} tokens (due to API limits)
Preview of beginning:
{preview}"""
except Exception as e:
return f"Error uploading file: {str(e)}"
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p
):
global uploaded_file_content
# Truncate file content if needed based on token limit
truncated_content = truncate_text_by_tokens(uploaded_file_content, max_tokens) if uploaded_file_content else ""
# Format the current message to include truncated file content
current_message = message
if truncated_content:
current_message = f"""Here is the content of the uploaded text file (truncated to fit within limits):
{truncated_content}
User's question about the above content: {message}"""
# Build the complete message history
messages = [
{"role": "system", "content": system_message}
]
# Add conversation history (limited)
for user_msg, assistant_msg in history[-3:]: # Only keep last 3 exchanges
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
# Add the current message
messages.append({"role": "user", "content": current_message})
# Generate and stream the response
response = ""
try:
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
if token:
response += token
yield response
except Exception as e:
yield f"Error generating response: {str(e)}\nTry asking about a specific part of the text instead of the whole document."
# Clear the uploaded file content after responding
uploaded_file_content = ""
# Create the Gradio interface
demo = gr.Blocks()
with demo:
gr.Markdown("""## Chat Interface with File Upload
Note: Due to API limitations, very large texts will be truncated. For best results with long texts:
- Ask about specific sections or chapters
- Break up your questions into smaller parts
- Be specific about what you want to know
""")
with gr.Row():
with gr.Column():
file_upload = gr.File(
label="Upload a text file",
file_types=[".txt"],
type="binary"
)
upload_button = gr.Button("Upload File")
upload_output = gr.Textbox(
label="Upload Status",
interactive=False,
lines=6
)
# Connect file upload handling
upload_button.click(
fn=handle_file_upload,
inputs=[file_upload],
outputs=[upload_output]
)
chatbot = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are a helpful AI assistant. When analyzing text files, provide detailed and insightful analysis of their content. If the text is truncated, acknowledge this in your response.",
label="System message"
),
gr.Slider(
minimum=1,
maximum=32000,
value=30000,
step=1,
label="Max new tokens"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
),
],
)
if __name__ == "__main__":
demo.launch()