Spaces:

Sg-at-srijan-us-kg
/

Qwen2.5-72B

Running

App Files Files Community

Qwen2.5-72B / app.py

Sg-at-srijan-us-kg

Update app.py

d6f3459 verified 3 days ago

raw

history blame

5.59 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	from transformers import AutoTokenizer

	client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
	tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-32B-Instruct")

	# Global variables
	uploaded_file_content = ""

	def truncate_text_by_tokens(text, max_tokens):
	"""Truncate text to max_tokens while trying to preserve complete sentences."""
	# Tokenize the input text
	tokens = tokenizer.encode(text)

	# If the text is already within the token limit, return as is
	if len(tokens) <= max_tokens:
	return text

	# Otherwise, truncate the token list and decode it back to text
	truncated_tokens = tokens[:max_tokens]
	truncated_text = tokenizer.decode(truncated_tokens, skip_special_tokens=True)

	return truncated_text

	def handle_file_upload(file_obj):
	global uploaded_file_content
	if file_obj is None:
	return "No file uploaded."

	try:
	# Read file content
	content = file_obj.read()
	try:
	file_content = content.decode('utf-8')
	except UnicodeDecodeError:
	file_content = content.decode('latin-1')

	# Store full content but truncate for preview
	uploaded_file_content = file_content
	preview = truncate_text_by_tokens(file_content, max_tokens=100)[:200] + "..." # Preview truncated content

	total_tokens = len(tokenizer.encode(file_content))
	usable_tokens = len(tokenizer.encode(uploaded_file_content))

	return f"""File uploaded successfully!
	Total length: {total_tokens} tokens
	Usable length for AI: {usable_tokens} tokens (due to API limits)
	Preview of beginning:
	{preview}"""

	except Exception as e:
	return f"Error uploading file: {str(e)}"

	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p
	):
	global uploaded_file_content

	# Truncate file content if needed based on token limit
	truncated_content = truncate_text_by_tokens(uploaded_file_content, max_tokens) if uploaded_file_content else ""

	# Format the current message to include truncated file content
	current_message = message
	if truncated_content:
	current_message = f"""Here is the content of the uploaded text file (truncated to fit within limits):

	{truncated_content}

	User's question about the above content: {message}"""

	# Build the complete message history
	messages = [
	{"role": "system", "content": system_message}
	]

	# Add conversation history (limited)
	for user_msg, assistant_msg in history[-3:]: # Only keep last 3 exchanges
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})

	# Add the current message
	messages.append({"role": "user", "content": current_message})

	# Generate and stream the response
	response = ""
	try:
	for message in client.chat_completion(
	messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	token = message.choices[0].delta.content
	if token:
	response += token
	yield response
	except Exception as e:
	yield f"Error generating response: {str(e)}\nTry asking about a specific part of the text instead of the whole document."

	# Clear the uploaded file content after responding
	uploaded_file_content = ""

	# Create the Gradio interface
	demo = gr.Blocks()

	with demo:
	gr.Markdown("""## Chat Interface with File Upload
	Note: Due to API limitations, very large texts will be truncated. For best results with long texts:
	- Ask about specific sections or chapters
	- Break up your questions into smaller parts
	- Be specific about what you want to know
	""")

	with gr.Row():
	with gr.Column():
	file_upload = gr.File(
	label="Upload a text file",
	file_types=[".txt"],
	type="binary"
	)
	upload_button = gr.Button("Upload File")
	upload_output = gr.Textbox(
	label="Upload Status",
	interactive=False,
	lines=6
	)

	# Connect file upload handling
	upload_button.click(
	fn=handle_file_upload,
	inputs=[file_upload],
	outputs=[upload_output]
	)

	chatbot = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(
	value="You are a helpful AI assistant. When analyzing text files, provide detailed and insightful analysis of their content. If the text is truncated, acknowledge this in your response.",
	label="System message"
	),
	gr.Slider(
	minimum=1,
	maximum=32000,
	value=30000,
	step=1,
	label="Max new tokens"
	),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)"
	),
	],
	)

	if __name__ == "__main__":
	demo.launch()