Spaces:
Running
Running
import gradio as gr | |
from huggingface_hub import InferenceClient | |
from transformers import AutoTokenizer | |
client = InferenceClient("Qwen/Qwen2.5-72B-Instruct") | |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-32B-Instruct") | |
# Global variables | |
uploaded_file_content = "" | |
def truncate_text_by_tokens(text, max_tokens): | |
"""Truncate text to max_tokens while trying to preserve complete sentences.""" | |
# Tokenize the input text | |
tokens = tokenizer.encode(text) | |
# If the text is already within the token limit, return as is | |
if len(tokens) <= max_tokens: | |
return text | |
# Otherwise, truncate the token list and decode it back to text | |
truncated_tokens = tokens[:max_tokens] | |
truncated_text = tokenizer.decode(truncated_tokens, skip_special_tokens=True) | |
return truncated_text | |
def handle_file_upload(file_obj): | |
global uploaded_file_content | |
if file_obj is None: | |
return "No file uploaded." | |
try: | |
# Read file content | |
content = file_obj.read() | |
try: | |
file_content = content.decode('utf-8') | |
except UnicodeDecodeError: | |
file_content = content.decode('latin-1') | |
# Store full content but truncate for preview | |
uploaded_file_content = file_content | |
preview = truncate_text_by_tokens(file_content, max_tokens=100)[:200] + "..." # Preview truncated content | |
total_tokens = len(tokenizer.encode(file_content)) | |
usable_tokens = len(tokenizer.encode(uploaded_file_content)) | |
return f"""File uploaded successfully! | |
Total length: {total_tokens} tokens | |
Usable length for AI: {usable_tokens} tokens (due to API limits) | |
Preview of beginning: | |
{preview}""" | |
except Exception as e: | |
return f"Error uploading file: {str(e)}" | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
temperature, | |
top_p | |
): | |
global uploaded_file_content | |
# Truncate file content if needed based on token limit | |
truncated_content = truncate_text_by_tokens(uploaded_file_content, max_tokens) if uploaded_file_content else "" | |
# Format the current message to include truncated file content | |
current_message = message | |
if truncated_content: | |
current_message = f"""Here is the content of the uploaded text file (truncated to fit within limits): | |
{truncated_content} | |
User's question about the above content: {message}""" | |
# Build the complete message history | |
messages = [ | |
{"role": "system", "content": system_message} | |
] | |
# Add conversation history (limited) | |
for user_msg, assistant_msg in history[-3:]: # Only keep last 3 exchanges | |
messages.append({"role": "user", "content": user_msg}) | |
if assistant_msg: | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
# Add the current message | |
messages.append({"role": "user", "content": current_message}) | |
# Generate and stream the response | |
response = "" | |
try: | |
for message in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = message.choices[0].delta.content | |
if token: | |
response += token | |
yield response | |
except Exception as e: | |
yield f"Error generating response: {str(e)}\nTry asking about a specific part of the text instead of the whole document." | |
# Clear the uploaded file content after responding | |
uploaded_file_content = "" | |
# Create the Gradio interface | |
demo = gr.Blocks() | |
with demo: | |
gr.Markdown("""## Chat Interface with File Upload | |
Note: Due to API limitations, very large texts will be truncated. For best results with long texts: | |
- Ask about specific sections or chapters | |
- Break up your questions into smaller parts | |
- Be specific about what you want to know | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
file_upload = gr.File( | |
label="Upload a text file", | |
file_types=[".txt"], | |
type="binary" | |
) | |
upload_button = gr.Button("Upload File") | |
upload_output = gr.Textbox( | |
label="Upload Status", | |
interactive=False, | |
lines=6 | |
) | |
# Connect file upload handling | |
upload_button.click( | |
fn=handle_file_upload, | |
inputs=[file_upload], | |
outputs=[upload_output] | |
) | |
chatbot = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox( | |
value="You are a helpful AI assistant. When analyzing text files, provide detailed and insightful analysis of their content. If the text is truncated, acknowledge this in your response.", | |
label="System message" | |
), | |
gr.Slider( | |
minimum=1, | |
maximum=32000, | |
value=30000, | |
step=1, | |
label="Max new tokens" | |
), | |
gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.7, | |
step=0.1, | |
label="Temperature" | |
), | |
gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="Top-p (nucleus sampling)" | |
), | |
], | |
) | |
if __name__ == "__main__": | |
demo.launch() | |