Qwen2.5-72B / app.py
Sg-at-srijan-us-kg's picture
Update app.py
b8c7368 verified
raw
history blame
5.33 kB
import gradio as gr
from huggingface_hub import InferenceClient
client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
# Global variables
uploaded_file_content = ""
MAX_CHARS = 50000 # Maximum characters to send to API
def truncate_text(text, max_length=MAX_CHARS):
"""Truncate text to max_length while trying to preserve complete sentences."""
if len(text) <= max_length:
return text
# Try to find the last period before max_length
last_period = text[:max_length].rfind('.')
if last_period != -1:
return text[:last_period + 1]
# If no period found, just truncate at max_length
return text[:max_length] + "..."
def handle_file_upload(file_obj):
global uploaded_file_content
if file_obj is None:
return "No file uploaded."
try:
content = file_obj
try:
file_content = content.decode('utf-8')
except UnicodeDecodeError:
file_content = content.decode('latin-1')
# Store full content but truncate for preview
uploaded_file_content = file_content
truncated = truncate_text(file_content, MAX_CHARS) # Store full but preview truncated
preview = truncated[:200] + "..."
total_chars = len(file_content)
usable_chars = len(truncated)
return f"""File uploaded successfully!
Total length: {total_chars} characters
Usable length for AI: {usable_chars} characters (due to API limits)
Preview of beginning:
{preview}"""
except Exception as e:
return f"Error uploading file: {str(e)}"
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p
):
global uploaded_file_content
# Truncate file content if needed
truncated_content = truncate_text(uploaded_file_content) if uploaded_file_content else ""
# Format the current message to include truncated file content
current_message = message
if truncated_content:
current_message = f"""Here is the content of the uploaded text file (truncated to fit within limits):
{truncated_content}
User's question about the above content: {message}"""
# Build the complete message history
messages = [
{"role": "system", "content": system_message}
]
# Add conversation history (limited)
for user_msg, assistant_msg in history[-3:]: # Only keep last 3 exchanges
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
# Add the current message
messages.append({"role": "user", "content": current_message})
# Generate and stream the response
response = ""
try:
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
if token:
response += token
yield response
except Exception as e:
yield f"Error generating response: {str(e)}\nTry asking about a specific part of the text instead of the whole document."
# Clear the uploaded file content after responding
uploaded_file_content = ""
# Create the Gradio interface
demo = gr.Blocks()
with demo:
gr.Markdown("""## Chat Interface with File Upload
Note: Due to API limitations, very large texts will be truncated. For best results with long texts:
- Ask about specific sections or chapters
- Break up your questions into smaller parts
- Be specific about what you want to know
""")
with gr.Row():
with gr.Column():
file_upload = gr.File(
label="Upload a text file",
file_types=[".txt"],
type="binary"
)
upload_button = gr.Button("Upload File")
upload_output = gr.Textbox(
label="Upload Status",
interactive=False,
lines=6
)
# Connect file upload handling
upload_button.click(
fn=handle_file_upload,
inputs=[file_upload],
outputs=[upload_output]
)
chatbot = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are a helpful AI assistant. When analyzing text files, provide detailed and insightful analysis of their content. If the text is truncated, acknowledge this in your response.",
label="System message"
),
gr.Slider(
minimum=1,
maximum=32000,
value=2048,
step=1,
label="Max new tokens"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
),
],
)
if __name__ == "__main__":
demo.launch()