import PIL.Image import gradio as gr import base64 import os import google.generativeai as genai from dotenv import load_dotenv load_dotenv() # Set Google API key GOOGLe_API_KEY = os.getenv("GOOGLE_API_KEY") genai.configure(api_key = GOOGLe_API_KEY) # Create the Model txt_model = genai.GenerativeModel('gemini-pro') vis_model = genai.GenerativeModel('gemini-pro-vision') # Image to Base 64 Converter def image_to_base64(image_path): with open(image_path, 'rb') as img: encoded_string = base64.b64encode(img.read()) return encoded_string.decode('utf-8') # Function that takes User Inputs and displays it on ChatUI def query_message(history,txt,img): if not img: history += [(txt,None)] return history base64 = image_to_base64(img) data_url = f"data:image/jpeg;base64,{base64}" history += [(f"{txt} ![]({data_url})", None)] return history # Function that takes User Inputs, generates Response and displays on Chat UI def llm_response(history,text,img): if not img: response = txt_model.generate_content(text) history += [(None,response.text)] return history, gr.update(value = "") else: img = PIL.Image.open(img) response = vis_model.generate_content([text,img]) history += [(None,response.text)] return history, gr.update(value = "") def image_to_base64(image_path): """ Reads an image file and returns its base64 encoded representation. Args: image_path (str): The path to the image file. Returns: str: The base64 encoded representation of the image data. """ with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode("utf-8") # Encode the logo image into base64 logo_base64 = image_to_base64("pixelpk_logo.png") markdown_content = f""" Feedback Logo

MultiModal Chatbot

Multimodal chatbot is designed to chat with text and images.

""" css = """ h1 { text-align: center; display:block; } """ # Interface Code with gr.Blocks(gr.themes.Monochrome(), css = css) as app: # Display introductory markdown content gr.Markdown(f"
{markdown_content}
") with gr.Row(): image_box = gr.Image(type = "filepath") chatbot = gr.Chatbot(scale = 3) text_box = gr.Textbox( placeholder="Enter text and press enter, or upload an image", container=False, ) btn = gr.Button("Submit") clicked = btn.click(query_message, [chatbot,text_box,image_box], [chatbot] ).then(llm_response, [chatbot,text_box,image_box], [chatbot, text_box] ) clicked = text_box.submit(query_message, [chatbot,text_box,image_box], [chatbot] ).then(llm_response, [chatbot,text_box,image_box], [chatbot, text_box] ) app.queue() app.launch(share = True, debug = True)