import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM import bitsandbytes as bnb import torch # Load the model and tokenizer with 4-bit quantization @st.cache_resource def load_model(): tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( "Qwen/Qwen-7B", load_in_4bit=True, device_map="auto", trust_remote_code=True # Allows custom code execution ) return tokenizer, model tokenizer, model = load_model() # Streamlit app UI st.title("Qwen-7B Text Generation with 4-bit Quantization") # Text input user_input = st.text_area("Enter your text:") # Generate text on button click if st.button("Generate"): inputs = tokenizer(user_input, return_tensors="pt") outputs = model.generate(**inputs, max_length=100) generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) st.write("Generated Text:") st.write(generated_text)