kwabs22
initial commit
f03a15c
raw
history blame
2.16 kB
import gradio as gr
from transformers import AutoModelForCausalLM, CodeGenTokenizerFast as Tokenizer
from PIL import Image
import gc
from pympler import asizeof
# Function to clear model from memory
def clear_model(model):
del model
gc.collect()
def process_image_and_question(image, question):
# Placeholder for your image processing and question answering
# Replace this with your actual model processing
# For example:
# enc_image = model.encode_image(image)
# answer = model.answer_question(enc_image, question, tokenizer)
# return answer
FinalOutput = ""
model_id = "vikhyatk/moondream1"
if question == "":
question = "describe this image?"
# Check if the model is already loaded
try:
model
except NameError:
# clear_model(model) # Example of clearing the model
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
tokenizer = Tokenizer.from_pretrained(model_id)
# Assuming you have a correct way to process the image
#image = Image.open('/content/_57e22ed5-217c-4004-a279-eeecc18cbd55.jpg') #/content/Bard_Generated_Image (3).jpg')
# This part of the code is incorrect for a standard transformers model
enc_image = model.encode_image(image)
FinalOutput += model.answer_question(enc_image, "how many people are there? also explain if the image is weird?", tokenizer)
model_size = asizeof.asizeof(model)
tokenizer_size = asizeof.asizeof(tokenizer)
FinalOutput += f"\nModel size in RAM: {model_size} bytes, Tokenizer size in RAM: {tokenizer_size} bytes"
#model load and set-up = 1 min and inference on CPU = 2 min
return FinalOutput
# Define Gradio interface
iface = gr.Interface(fn=process_image_and_question,
inputs=[gr.Image(type="pil"), gr.Textbox(lines=2, placeholder="Ask a question about the image...")],
outputs="text",
title="Image Question Answering",
description="Upload an image and ask a question about it. ( 2 - 3 min response time expected )")
# Launch the interface
iface.launch()