import streamlit as st import gradio as gr from gradio_client import Client import re import torch from transformers import pipeline fuyu_client = Client("https://adept-fuyu-8b-demo.hf.space/") def get_caption(image_in): fuyu_result = fuyu_client.predict( image_in, # str representing input in 'raw_image' Image component True, # bool in 'Enable detailed captioning' Checkbox component fn_index=2 ) # Find the last occurrence of "." last_period_index = fuyu_result.rfind('.') # Truncate the string up to the last period truncated_caption = fuyu_result[:last_period_index + 1] # print(truncated_caption) print(f"\n—\nIMAGE CAPTION: {truncated_caption}") return truncated_caption #image_1 = st.file_uploader("Drag and drop an image here, or click to select one", type=["png", "jpg", "jpeg"]) # Display the uploaded image #if image_1 is not None: # Read the image # image = Image.open(image_1) # Display the image #st.image(image, caption="Uploaded Image", use_column_width=True) #get_caption(image) def infer(image_in): gr.Info("Getting image caption with Fuyu...") user_prompt = get_caption(image_in) write(user_prompt) return user_prompt with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.HTML(f"""

LLM Agent from a Picture

{description}

""") with gr.Row(): with gr.Column(): image_in = gr.Image( label = "Image reference", type = "filepath", elem_id = "image-in" ) submit_btn = gr.Button("Make LLM system from my pic !") submit_btn.click( fn = infer, inputs = [ image_in ], outputs =[ result ] )