import gradio as gr from huggingface_hub import InferenceClient client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct") # Global variables uploaded_file_content = "" MAX_CHARS = 50000 # Maximum characters to send to API def truncate_text(text, max_length=MAX_CHARS): """Truncate text to max_length while trying to preserve complete sentences.""" if len(text) <= max_length: return text # Try to find the last period before max_length last_period = text[:max_length].rfind('.') if last_period != -1: return text[:last_period + 1] # If no period found, just truncate at max_length return text[:max_length] + "..." def handle_file_upload(file_obj): global uploaded_file_content if file_obj is None: return "No file uploaded." try: content = file_obj try: file_content = content.decode('utf-8') except UnicodeDecodeError: file_content = content.decode('latin-1') # Store full content but truncate for preview uploaded_file_content = file_content truncated = truncate_text(file_content, MAX_CHARS) # Store full but preview truncated preview = truncated[:200] + "..." total_chars = len(file_content) usable_chars = len(truncated) return f"""File uploaded successfully! Total length: {total_chars} characters Usable length for AI: {usable_chars} characters (due to API limits) Preview of beginning: {preview}""" except Exception as e: return f"Error uploading file: {str(e)}" def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p ): global uploaded_file_content # Truncate file content if needed truncated_content = truncate_text(uploaded_file_content) if uploaded_file_content else "" # Format the current message to include truncated file content current_message = message if truncated_content: current_message = f"""Here is the content of the uploaded text file (truncated to fit within limits): {truncated_content} User's question about the above content: {message}""" # Build the complete message history messages = [ {"role": "system", "content": system_message} ] # Add conversation history (limited) for user_msg, assistant_msg in history[-3:]: # Only keep last 3 exchanges messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) # Add the current message messages.append({"role": "user", "content": current_message}) # Generate and stream the response response = "" try: for message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content if token: response += token yield response except Exception as e: yield f"Error generating response: {str(e)}\nTry asking about a specific part of the text instead of the whole document." # Clear the uploaded file content after responding uploaded_file_content = "" # Create the Gradio interface demo = gr.Blocks() with demo: gr.Markdown("""## Chat Interface with File Upload Note: Due to API limitations, very large texts will be truncated. For best results with long texts: - Ask about specific sections or chapters - Break up your questions into smaller parts - Be specific about what you want to know """) with gr.Row(): with gr.Column(): file_upload = gr.File( label="Upload a text file", file_types=[".txt"], type="binary" ) upload_button = gr.Button("Upload File") upload_output = gr.Textbox( label="Upload Status", interactive=False, lines=6 ) # Connect file upload handling upload_button.click( fn=handle_file_upload, inputs=[file_upload], outputs=[upload_output] ) chatbot = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox( value="You are a helpful AI assistant. When analyzing text files, provide detailed and insightful analysis of their content. If the text is truncated, acknowledge this in your response.", label="System message" ), gr.Slider( minimum=1, maximum=32000, value=2048, step=1, label="Max new tokens" ), gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature" ), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)" ), ], ) if __name__ == "__main__": demo.launch()