import gradio as gr
import torch
from gradio.components import Dropdown, Image, Textbox
from huggingface_hub import HfApi, ModelFilter
from transformers import AutoModelForCausalLM, AutoProcessor

# Get the list of models from the Hugging Face Hub
api = HfApi()
models_infos = api.list_models(author="jat-project", filter=ModelFilter(tags="text-generation"))
models_names = [model.modelId for model in models_infos]

# Dictionary to store loaded models and their pipelines
models = {}

# Load a default model initially
default_model_name = "jat-project/jat2-small-untrained"

def generate_text(model_name, input_image):
    # Check if the selected model is already loaded
    if model_name not in models:
        # Inform the user that the model is loading
        yield "Loading model..."

        # Load the model
        processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True, padding_side='left')
        model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
        models[model_name] = model, processor

    # Get the model for the selected model
    model, processor = models[model_name]

    # Inform the user that the text is being generated
    yield "Generating caption..."

    # Convert the input image to a tensor
    pixel_values = processor(images=input_image, return_tensors="pt").pixel_values

    # Generate text
    generated_ids = model.generate(pixel_values=pixel_values, max_length=100, early_stopping=True)
    generated_text = processor.decode(generated_ids[0], skip_special_tokens=True)
    
    # Return the generated text
    yield generated_text

# Define the Gradio interface
iface = gr.Interface(
    fn=generate_text,  # Function to be called on user input
    inputs=[
        Dropdown(models_names, label="Select Model", value=default_model_name),  # Select model
        Image(label="Input Image"),  # Image input
    ],
    outputs=Textbox(label="Generated Caption"),  # Textbox to display the generated text
    title="JAT Image Captioning",  # Title of the interface
)

# Launch the Gradio interface
iface.launch(enable_queue=True)