Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, CodeGenTokenizerFast as Tokenizer | |
from PIL import Image | |
import gc | |
from pympler import asizeof | |
# Function to clear model from memory | |
def clear_model(model): | |
del model | |
gc.collect() | |
def process_image_and_question(image, question): | |
# Placeholder for your image processing and question answering | |
# Replace this with your actual model processing | |
# For example: | |
# enc_image = model.encode_image(image) | |
# answer = model.answer_question(enc_image, question, tokenizer) | |
# return answer | |
FinalOutput = "" | |
model_id = "vikhyatk/moondream1" | |
if question == "": | |
question = "describe this image?" | |
# Check if the model is already loaded | |
try: | |
model | |
except NameError: | |
# clear_model(model) # Example of clearing the model | |
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True) | |
tokenizer = Tokenizer.from_pretrained(model_id) | |
# Assuming you have a correct way to process the image | |
#image = Image.open('/content/_57e22ed5-217c-4004-a279-eeecc18cbd55.jpg') #/content/Bard_Generated_Image (3).jpg') | |
# This part of the code is incorrect for a standard transformers model | |
enc_image = model.encode_image(image) | |
FinalOutput += model.answer_question(enc_image, "how many people are there? also explain if the image is weird?", tokenizer) | |
model_size = asizeof.asizeof(model) | |
tokenizer_size = asizeof.asizeof(tokenizer) | |
FinalOutput += f"\nModel size in RAM: {model_size} bytes, Tokenizer size in RAM: {tokenizer_size} bytes" | |
#model load and set-up = 1 min and inference on CPU = 2 min | |
return FinalOutput | |
# Define Gradio interface | |
iface = gr.Interface(fn=process_image_and_question, | |
inputs=[gr.Image(type="pil"), gr.Textbox(lines=2, placeholder="Ask a question about the image...")], | |
outputs="text", | |
title="Image Question Answering", | |
description="Upload an image and ask a question about it. ( 2 - 3 min response time expected )") | |
# Launch the interface | |
iface.launch() | |