import gradio as gr from huggingface_hub import InferenceClient from transformers import AutoTokenizer client = InferenceClient("Qwen/Qwen2.5-72B-Instruct") tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-32B-Instruct") # Global variables uploaded_file_content = "" def truncate_text_by_tokens(text, max_tokens): """Truncate text to max_tokens while trying to preserve complete sentences.""" # Tokenize the input text tokens = tokenizer.encode(text) # If the text is already within the token limit, return as is if len(tokens) <= max_tokens: return text # Otherwise, truncate the token list and decode it back to text truncated_tokens = tokens[:max_tokens] truncated_text = tokenizer.decode(truncated_tokens, skip_special_tokens=True) return truncated_text def handle_file_upload(file_obj): global uploaded_file_content if file_obj is None: return "No file uploaded." try: # Read file content content = file_obj.read() try: file_content = content.decode('utf-8') except UnicodeDecodeError: file_content = content.decode('latin-1') # Store full content but truncate for preview uploaded_file_content = file_content preview = truncate_text_by_tokens(file_content, max_tokens=100)[:200] + "..." # Preview truncated content total_tokens = len(tokenizer.encode(file_content)) usable_tokens = len(tokenizer.encode(uploaded_file_content)) return f"""File uploaded successfully! Total length: {total_tokens} tokens Usable length for AI: {usable_tokens} tokens (due to API limits) Preview of beginning: {preview}""" except Exception as e: return f"Error uploading file: {str(e)}" def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p ): global uploaded_file_content # Truncate file content if needed based on token limit truncated_content = truncate_text_by_tokens(uploaded_file_content, max_tokens) if uploaded_file_content else "" # Format the current message to include truncated file content current_message = message if truncated_content: current_message = f"""Here is the content of the uploaded text file (truncated to fit within limits): {truncated_content} User's question about the above content: {message}""" # Build the complete message history messages = [ {"role": "system", "content": system_message} ] # Add conversation history (limited) for user_msg, assistant_msg in history[-3:]: # Only keep last 3 exchanges messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) # Add the current message messages.append({"role": "user", "content": current_message}) # Generate and stream the response response = "" try: for message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content if token: response += token yield response except Exception as e: yield f"Error generating response: {str(e)}\nTry asking about a specific part of the text instead of the whole document." # Clear the uploaded file content after responding uploaded_file_content = "" # Create the Gradio interface demo = gr.Blocks() with demo: gr.Markdown("""## Chat Interface with File Upload Note: Due to API limitations, very large texts will be truncated. For best results with long texts: - Ask about specific sections or chapters - Break up your questions into smaller parts - Be specific about what you want to know """) with gr.Row(): with gr.Column(): file_upload = gr.File( label="Upload a text file", file_types=[".txt"], type="binary" ) upload_button = gr.Button("Upload File") upload_output = gr.Textbox( label="Upload Status", interactive=False, lines=6 ) # Connect file upload handling upload_button.click( fn=handle_file_upload, inputs=[file_upload], outputs=[upload_output] ) chatbot = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox( value="You are a helpful AI assistant. When analyzing text files, provide detailed and insightful analysis of their content. If the text is truncated, acknowledge this in your response.", label="System message" ), gr.Slider( minimum=1, maximum=32000, value=30000, step=1, label="Max new tokens" ), gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature" ), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)" ), ], ) if __name__ == "__main__": demo.launch()