import gradio as gr
from transformers import AutoModelForCausalLM, CodeGenTokenizerFast as Tokenizer
from PIL import Image
import gc
from pympler import asizeof 

# Function to clear model from memory
def clear_model(model):
    del model
    gc.collect()

def process_image_and_question(image, question):
    FinalOutput = ""
    model_id = "vikhyatk/moondream1"

    if question == "":
        question = "describe this image?"

    # Check if the model is already loaded
    try:
        model
    except NameError:
        # clear_model(model) # Example of clearing the model
        model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
        tokenizer = Tokenizer.from_pretrained(model_id)

    enc_image = model.encode_image(image)
    FinalOutput += model.answer_question(enc_image, "how many people are there? also explain if the image is weird?", tokenizer)

    model_size = asizeof.asizeof(model)
    tokenizer_size = asizeof.asizeof(tokenizer)
    FinalOutput += f"\n\nExpected Ram usage: +- 9.5 gb \nModel size in RAM: {model_size} bytes, Tokenizer size in RAM: {tokenizer_size} bytes"
    #clear_model(model) #Not needed due to try except check

    #model load and set-up = 1 min and inference on CPU = 2 min
    return FinalOutput

# Define Gradio interface
iface = gr.Interface(fn=process_image_and_question,
                     inputs=[gr.Image(type="pil"), gr.Textbox(lines=2, placeholder="Ask a question about the image...")],
                     outputs="text",
                     title="Image Question Answering",
                     description="Upload an image and ask a question about it. ( 3 - 4 min response time expected )")

# Launch the interface
iface.launch()