|
import streamlit as st |
|
from transformers import AutoProcessor, AutoModelForCausalLM |
|
from PIL import Image |
|
import torch |
|
|
|
|
|
model_id = "OpenFace-CQUPT/Human_LLaVA" |
|
processor = AutoProcessor.from_pretrained(model_id) |
|
model = AutoModelForCausalLM.from_pretrained(model_id).to("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
st.title("Visual Question Answering App") |
|
st.write("Upload an image and ask a question about it!") |
|
|
|
|
|
uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) |
|
question = st.text_input("Ask a question about the image:") |
|
|
|
|
|
if uploaded_image is not None and question: |
|
image = Image.open(uploaded_image) |
|
|
|
|
|
st.image(image, caption="Uploaded Image", use_column_width=True) |
|
st.write("Question:", question) |
|
|
|
|
|
with st.spinner("Generating answer..."): |
|
inputs = processor(images=image, text=question, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu") |
|
with torch.no_grad(): |
|
output = model.generate(**inputs) |
|
answer = processor.decode(output[0], skip_special_tokens=True) |
|
|
|
|
|
st.write("Answer:", answer) |
|
|