import torch from PIL import Image from transformers import AutoModel, AutoTokenizer import streamlit as st from transformers import pipeline from huggingface_hub import InferenceClient import os # Define your API key here my_key = "your_api_key_here" # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True) model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2',trust_remote_code=True) model.eval() # Set device for model device = 'cuda' if torch.cuda.is_available() else 'cpu' model = model.to(device=device, dtype=torch.float16 if device == 'cuda' else torch.float32) # Retrieve the API key from the environment api_key = os.getenv("HF_API_KEY") # Initialize the Hugging Face Inference client with the API key client = InferenceClient(api_key=api_key) # Streamlit UI setup st.title("Image Questioning and Content Generation App") st.write("Upload an image and ask a question. The model will respond with a description, and you can generate a song or story based on the response.") # Upload an image uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) if uploaded_image: image = Image.open(uploaded_image).convert('RGB') st.image(image, caption="Uploaded Image", use_column_width=True) # Text input for the question question = st.text_input("Ask a question about the image") if question and uploaded_image: msgs = [{'role': 'user', 'content': question}] # Model's response to the image question with st.spinner("Processing..."): res, context, _ = model.chat( image=image, msgs=msgs, context=None, tokenizer=tokenizer, sampling=True, temperature=0.7 ) st.write("Model's response:", res) # Options for generating content based on the response option = st.selectbox("Generate content based on the response", ["Choose...", "Write a Song", "Write a Story"]) if option != "Choose...": # Create a message based on user choice if option == "Write a Song": messages = [{"role": "user", "content": f"Write a song about the following: {res}"}] elif option == "Write a Story": messages = [{"role": "user", "content": f"Write a story about the following: {res}"}] # Stream the content generation st.write(f"Generating {option.lower()}...") stream = client.chat.completions.create( model="meta-llama/Llama-3.2-3B-Instruct", messages=messages, max_tokens=500, stream=True ) generated_text = "" for chunk in stream: generated_text += chunk.choices[0].delta.content st.write(generated_text) # Display each chunk as it's generated