Spaces:

Sg-at-srijan-us-kg
/

Qwen2.5-72B

Running

File size: 5,588 Bytes

95addb9
 
ee39377
95addb9
ee39377
ab5e56a
95addb9
78c0656
60dc05f
78c0656
aef3bee
ee39377
 
 
 
 
 
78c0656
 
ee39377
 
 
78c0656
ee39377
60dc05f
4ad4683
60dc05f
4ad4683
410b7f7
 
60dc05f
d6f3459
 
410b7f7
 
 
 
60dc05f
78c0656
5bf4c98
ee39377
78c0656
ee39377
298a354
78c0656
 
ee39377
 
78c0656
 
 
60dc05f
410b7f7
60dc05f
95addb9
 
 
 
 
 
60dc05f
95addb9
60dc05f
a347d13
ee39377
 
78c0656
 
a347d13
78c0656
 
96f13df
78c0656
a347d13
 
 
 
 
 
 
 
78c0656
 
a347d13
78c0656
410b7f7
a347d13
 
 
95addb9
a347d13
95addb9
410b7f7
 
 
 
 
 
 
 
 
a347d13
410b7f7
 
 
78c0656
60dc05f
b8c7368
 
 
410b7f7
60dc05f
 
 
78c0656
 
 
 
 
 
322053c
60dc05f
410b7f7
 
 
 
a347d13
410b7f7
 
 
 
 
78c0656
410b7f7
 
 
 
 
 
 
 
60dc05f
410b7f7
60dc05f
 
410b7f7
78c0656
410b7f7
 
 
 
8b5c0e9
 
410b7f7
 
 
 
 
 
 
 
 
 
60dc05f
 
 
 
 
 
 
 
 
95addb9
 
ee39377

import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer

client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-32B-Instruct")

# Global variables
uploaded_file_content = ""

def truncate_text_by_tokens(text, max_tokens):
    """Truncate text to max_tokens while trying to preserve complete sentences."""
    # Tokenize the input text
    tokens = tokenizer.encode(text)
    
    # If the text is already within the token limit, return as is
    if len(tokens) <= max_tokens:
        return text
    
    # Otherwise, truncate the token list and decode it back to text
    truncated_tokens = tokens[:max_tokens]
    truncated_text = tokenizer.decode(truncated_tokens, skip_special_tokens=True)
    
    return truncated_text

def handle_file_upload(file_obj):
    global uploaded_file_content
    if file_obj is None:
        return "No file uploaded."
    
    try:
        # Read file content
        content = file_obj.read()
        try:
            file_content = content.decode('utf-8')
        except UnicodeDecodeError:
            file_content = content.decode('latin-1')
        
        # Store full content but truncate for preview
        uploaded_file_content = file_content
        preview = truncate_text_by_tokens(file_content, max_tokens=100)[:200] + "..."  # Preview truncated content
        
        total_tokens = len(tokenizer.encode(file_content))
        usable_tokens = len(tokenizer.encode(uploaded_file_content))
        
        return f"""File uploaded successfully! 
Total length: {total_tokens} tokens
Usable length for AI: {usable_tokens} tokens (due to API limits)
Preview of beginning:
{preview}"""
    
    except Exception as e:
        return f"Error uploading file: {str(e)}"

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p
):
    global uploaded_file_content
    
    # Truncate file content if needed based on token limit
    truncated_content = truncate_text_by_tokens(uploaded_file_content, max_tokens) if uploaded_file_content else ""
    
    # Format the current message to include truncated file content
    current_message = message
    if truncated_content:
        current_message = f"""Here is the content of the uploaded text file (truncated to fit within limits):

{truncated_content}

User's question about the above content: {message}"""

    # Build the complete message history
    messages = [
        {"role": "system", "content": system_message}
    ]
    
    # Add conversation history (limited)
    for user_msg, assistant_msg in history[-3:]:  # Only keep last 3 exchanges
        messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    
    # Add the current message
    messages.append({"role": "user", "content": current_message})

    # Generate and stream the response
    response = ""
    try:
        for message in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = message.choices[0].delta.content
            if token:
                response += token
                yield response
    except Exception as e:
        yield f"Error generating response: {str(e)}\nTry asking about a specific part of the text instead of the whole document."

    # Clear the uploaded file content after responding
    uploaded_file_content = ""

# Create the Gradio interface
demo = gr.Blocks()

with demo:
    gr.Markdown("""## Chat Interface with File Upload
    Note: Due to API limitations, very large texts will be truncated. For best results with long texts:
    - Ask about specific sections or chapters
    - Break up your questions into smaller parts
    - Be specific about what you want to know
    """)
    
    with gr.Row():
        with gr.Column():
            file_upload = gr.File(
                label="Upload a text file",
                file_types=[".txt"],
                type="binary"
            )
            upload_button = gr.Button("Upload File")
            upload_output = gr.Textbox(
                label="Upload Status",
                interactive=False,
                lines=6
            )
    
    # Connect file upload handling
    upload_button.click(
        fn=handle_file_upload,
        inputs=[file_upload],
        outputs=[upload_output]
    )

    chatbot = gr.ChatInterface(
        respond,
        additional_inputs=[
            gr.Textbox(
                value="You are a helpful AI assistant. When analyzing text files, provide detailed and insightful analysis of their content. If the text is truncated, acknowledge this in your response.",
                label="System message"
            ),
            gr.Slider(
                minimum=1,
                maximum=32000,
                value=30000,
                step=1,
                label="Max new tokens"
            ),
            gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.7,
                step=0.1,
                label="Temperature"
            ),
            gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.95,
                step=0.05,
                label="Top-p (nucleus sampling)"
            ),
        ],
    )

if __name__ == "__main__":
    demo.launch()