Spaces:
Running
Running
File size: 5,588 Bytes
95addb9 ee39377 95addb9 ee39377 ab5e56a 95addb9 78c0656 60dc05f 78c0656 aef3bee ee39377 78c0656 ee39377 78c0656 ee39377 60dc05f 4ad4683 60dc05f 4ad4683 410b7f7 60dc05f d6f3459 410b7f7 60dc05f 78c0656 5bf4c98 ee39377 78c0656 ee39377 298a354 78c0656 ee39377 78c0656 60dc05f 410b7f7 60dc05f 95addb9 60dc05f 95addb9 60dc05f a347d13 ee39377 78c0656 a347d13 78c0656 96f13df 78c0656 a347d13 78c0656 a347d13 78c0656 410b7f7 a347d13 95addb9 a347d13 95addb9 410b7f7 a347d13 410b7f7 78c0656 60dc05f b8c7368 410b7f7 60dc05f 78c0656 322053c 60dc05f 410b7f7 a347d13 410b7f7 78c0656 410b7f7 60dc05f 410b7f7 60dc05f 410b7f7 78c0656 410b7f7 8b5c0e9 410b7f7 60dc05f 95addb9 ee39377 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer
client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-32B-Instruct")
# Global variables
uploaded_file_content = ""
def truncate_text_by_tokens(text, max_tokens):
"""Truncate text to max_tokens while trying to preserve complete sentences."""
# Tokenize the input text
tokens = tokenizer.encode(text)
# If the text is already within the token limit, return as is
if len(tokens) <= max_tokens:
return text
# Otherwise, truncate the token list and decode it back to text
truncated_tokens = tokens[:max_tokens]
truncated_text = tokenizer.decode(truncated_tokens, skip_special_tokens=True)
return truncated_text
def handle_file_upload(file_obj):
global uploaded_file_content
if file_obj is None:
return "No file uploaded."
try:
# Read file content
content = file_obj.read()
try:
file_content = content.decode('utf-8')
except UnicodeDecodeError:
file_content = content.decode('latin-1')
# Store full content but truncate for preview
uploaded_file_content = file_content
preview = truncate_text_by_tokens(file_content, max_tokens=100)[:200] + "..." # Preview truncated content
total_tokens = len(tokenizer.encode(file_content))
usable_tokens = len(tokenizer.encode(uploaded_file_content))
return f"""File uploaded successfully!
Total length: {total_tokens} tokens
Usable length for AI: {usable_tokens} tokens (due to API limits)
Preview of beginning:
{preview}"""
except Exception as e:
return f"Error uploading file: {str(e)}"
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p
):
global uploaded_file_content
# Truncate file content if needed based on token limit
truncated_content = truncate_text_by_tokens(uploaded_file_content, max_tokens) if uploaded_file_content else ""
# Format the current message to include truncated file content
current_message = message
if truncated_content:
current_message = f"""Here is the content of the uploaded text file (truncated to fit within limits):
{truncated_content}
User's question about the above content: {message}"""
# Build the complete message history
messages = [
{"role": "system", "content": system_message}
]
# Add conversation history (limited)
for user_msg, assistant_msg in history[-3:]: # Only keep last 3 exchanges
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
# Add the current message
messages.append({"role": "user", "content": current_message})
# Generate and stream the response
response = ""
try:
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
if token:
response += token
yield response
except Exception as e:
yield f"Error generating response: {str(e)}\nTry asking about a specific part of the text instead of the whole document."
# Clear the uploaded file content after responding
uploaded_file_content = ""
# Create the Gradio interface
demo = gr.Blocks()
with demo:
gr.Markdown("""## Chat Interface with File Upload
Note: Due to API limitations, very large texts will be truncated. For best results with long texts:
- Ask about specific sections or chapters
- Break up your questions into smaller parts
- Be specific about what you want to know
""")
with gr.Row():
with gr.Column():
file_upload = gr.File(
label="Upload a text file",
file_types=[".txt"],
type="binary"
)
upload_button = gr.Button("Upload File")
upload_output = gr.Textbox(
label="Upload Status",
interactive=False,
lines=6
)
# Connect file upload handling
upload_button.click(
fn=handle_file_upload,
inputs=[file_upload],
outputs=[upload_output]
)
chatbot = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are a helpful AI assistant. When analyzing text files, provide detailed and insightful analysis of their content. If the text is truncated, acknowledge this in your response.",
label="System message"
),
gr.Slider(
minimum=1,
maximum=32000,
value=30000,
step=1,
label="Max new tokens"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
),
],
)
if __name__ == "__main__":
demo.launch()
|