Spaces:

Sg-at-srijan-us-kg
/

Qwen2.5-72B

Running

App Files Files

Qwen2.5-72B / app.py

Sg-at-srijan-us-kg

Update app.py

b8c7368 verified 17 days ago

raw

history blame

5.33 kB

	import gradio as gr
	from huggingface_hub import InferenceClient

	client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")

	# Global variables
	uploaded_file_content = ""
	MAX_CHARS = 50000 # Maximum characters to send to API

	def truncate_text(text, max_length=MAX_CHARS):
	"""Truncate text to max_length while trying to preserve complete sentences."""
	if len(text) <= max_length:
	return text

	# Try to find the last period before max_length
	last_period = text[:max_length].rfind('.')
	if last_period != -1:
	return text[:last_period + 1]

	# If no period found, just truncate at max_length
	return text[:max_length] + "..."

	def handle_file_upload(file_obj):
	global uploaded_file_content
	if file_obj is None:
	return "No file uploaded."

	try:
	content = file_obj
	try:
	file_content = content.decode('utf-8')
	except UnicodeDecodeError:
	file_content = content.decode('latin-1')

	# Store full content but truncate for preview
	uploaded_file_content = file_content
	truncated = truncate_text(file_content, MAX_CHARS) # Store full but preview truncated
	preview = truncated[:200] + "..."

	total_chars = len(file_content)
	usable_chars = len(truncated)

	return f"""File uploaded successfully!
	Total length: {total_chars} characters
	Usable length for AI: {usable_chars} characters (due to API limits)
	Preview of beginning:
	{preview}"""

	except Exception as e:
	return f"Error uploading file: {str(e)}"

	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p
	):
	global uploaded_file_content

	# Truncate file content if needed
	truncated_content = truncate_text(uploaded_file_content) if uploaded_file_content else ""

	# Format the current message to include truncated file content
	current_message = message
	if truncated_content:
	current_message = f"""Here is the content of the uploaded text file (truncated to fit within limits):

	{truncated_content}

	User's question about the above content: {message}"""

	# Build the complete message history
	messages = [
	{"role": "system", "content": system_message}
	]

	# Add conversation history (limited)
	for user_msg, assistant_msg in history[-3:]: # Only keep last 3 exchanges
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})

	# Add the current message
	messages.append({"role": "user", "content": current_message})

	# Generate and stream the response
	response = ""
	try:
	for message in client.chat_completion(
	messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	token = message.choices[0].delta.content
	if token:
	response += token
	yield response
	except Exception as e:
	yield f"Error generating response: {str(e)}\nTry asking about a specific part of the text instead of the whole document."

	# Clear the uploaded file content after responding
	uploaded_file_content = ""

	# Create the Gradio interface
	demo = gr.Blocks()

	with demo:
	gr.Markdown("""## Chat Interface with File Upload
	Note: Due to API limitations, very large texts will be truncated. For best results with long texts:
	- Ask about specific sections or chapters
	- Break up your questions into smaller parts
	- Be specific about what you want to know
	""")

	with gr.Row():
	with gr.Column():
	file_upload = gr.File(
	label="Upload a text file",
	file_types=[".txt"],
	type="binary"
	)
	upload_button = gr.Button("Upload File")
	upload_output = gr.Textbox(
	label="Upload Status",
	interactive=False,
	lines=6
	)

	# Connect file upload handling
	upload_button.click(
	fn=handle_file_upload,
	inputs=[file_upload],
	outputs=[upload_output]
	)

	chatbot = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(
	value="You are a helpful AI assistant. When analyzing text files, provide detailed and insightful analysis of their content. If the text is truncated, acknowledge this in your response.",
	label="System message"
	),
	gr.Slider(
	minimum=1,
	maximum=32000,
	value=2048,
	step=1,
	label="Max new tokens"
	),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)"
	),
	],
	)

	if __name__ == "__main__":
	demo.launch()