Spaces:

h2oai
/

h2ovl-mississippi

Running on A10G

App Files Files Community

Shanshan Wang commited on Sep 16

Commit

e809d4e

•

1 Parent(s): 1757eeb

added conversations and parameter options

Browse files

Files changed (1) hide show

app.py +138 -36

app.py CHANGED Viewed

@@ -3,13 +3,15 @@ from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
 import torch
 import torchvision.transforms as T
 from PIL import Image
 from torchvision.transforms.functional import InterpolationMode
-# Define the path to your model
 import os
 from huggingface_hub import login
 hf_token = os.environ.get('hf_token', None)
 path = "h2oai/h2o-mississippi-2b"
 # image preprocesing
@@ -174,48 +176,151 @@ tokenizer.eos_token = "<|end|>"
 model.generation_config.pad_token_id = tokenizer.pad_token_id
-def inference(image, prompt, temperature, top_p):
-    # Check if both image and prompt are provided
-    if image is None or prompt.strip() == "":
-        return "Please provide both an image and a prompt."
-    # Process the image and get pixel_values
-    pixel_values = load_image_msac(image)
     # Set generation config
     generation_config = dict(
         num_beams=1,
-        max_new_tokens=2048,
-        do_sample=False,
-        temperature=temperature,
-        top_p=top_p,
     )
-    # Generate the response
-    response = model.chat(
-        tokenizer,
-        pixel_values,
-        prompt,
-        generation_config
     )
-    return response
 # Build the Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("H2O-Mississippi")
     with gr.Row():
-        image_input = gr.Image(type="pil", label="Upload an Image")
-        prompt_input = gr.Textbox(label="Enter your prompt here")
     with gr.Accordion('Parameters', open=False):
-        temperature_input = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, default=1.0, label="Temperature")
-        top_p_input = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, default=0.9, label="Top-p")
-    response_output = gr.Textbox(label="Model Response")
     with gr.Row():
         submit_button = gr.Button("Submit")
         regenerate_button = gr.Button("Regenerate")
@@ -225,24 +330,21 @@ with gr.Blocks() as demo:
     # When the submit button is clicked, call the inference function
     submit_button.click(
         fn=inference,
-        inputs=[image_input, prompt_input, temperature_input, top_p_input],
-        outputs=response_output
     )
     # When the regenerate button is clicked, re-run the last inference
     regenerate_button.click(
-        fn=inference,
-        inputs=[image_input, prompt_input, temperature_input, top_p_input],
-        outputs=response_output
     )
-    # Define the clear button action
-    def clear_all():
-        return None, "", ""
     clear_button.click(
         fn=clear_all,
         inputs=None,
-        outputs=[image_input, prompt_input, response_output]
     )
 demo.launch()

 import torch
 import torchvision.transforms as T
 from PIL import Image
+import logging
+logging.basicConfig(level=logging.INFO)
 from torchvision.transforms.functional import InterpolationMode
 import os
 from huggingface_hub import login
 hf_token = os.environ.get('hf_token', None)
+# Define the path to your model
 path = "h2oai/h2o-mississippi-2b"
 # image preprocesing
 model.generation_config.pad_token_id = tokenizer.pad_token_id
+def inference(image, user_message, temperature, top_p, max_new_tokens, chatbot,state, image_state):
+    # if image is provided, store it in image_state:
+    if chatbot is None:
+        chatbot = []
+    if image is not None:
+        image_state = load_image_msac(image)
+    else:
+        # If image_state is None, then no image has been provided yet
+        if image_state is None:
+            chatbot.append(("System", "Please provide an image to start the conversation."))
+            return chatbot, state, image_state, ""
+    # Initialize history (state) if it's None
+    if state is None:
+        state = None  # model.chat function handles None as empty history
+    # Append user message to chatbot
+    chatbot.append((user_message, None))
     # Set generation config
+    do_sample = (float(temperature) != 0.0)
     generation_config = dict(
         num_beams=1,
+        max_new_tokens=int(max_new_tokens),
+        do_sample=do_sample,
+        temperature= float(temperature),
+        top_p= float(top_p),
+    )
+    # Call model.chat with history
+    response_text, new_state = model.chat(
+        tokenizer,
+        image_state,
+        user_message,
+        generation_config=generation_config,
+        history=state,
+        return_history=True
     )
+    # update the satet with new_state
+    state = new_state
+    # Update chatbot with the model's response
+    chatbot[-1] = (user_message, response_text)
+    return chatbot, state, image_state, ""
+def regenerate_response(chatbot, temperature, top_p, max_new_tokens, state, image_state):
+    # Check if there is a previous user message
+    if chatbot is None or len(chatbot) == 0:
+        chatbot = []
+        chatbot.append(("System", "Nothing to regenerate. Please start a conversation first."))
+        return chatbot, state, image_state
+    # Check if there is a previous user message
+    if state is None or image_state is None or len(state) == 0:
+        chatbot.append(("System", "Nothing to regenerate. Please start a conversation first."))
+        return chatbot, state, image_state
+    # Get the last user message
+    last_user_message, last_response = chatbot[-1]
+    state = state[:-1]  # Remove last assistant's response from history
+    if len(state) == 0:
+        state = None
+    # Set generation config
+    do_sample = (float(temperature) != 0.0)
+    generation_config = dict(
+        num_beams=1,
+        max_new_tokens=int(max_new_tokens),
+        do_sample=do_sample,
+        temperature= float(temperature),
+        top_p= float(top_p),
     )
+    # Regenerate the response
+    response_text, new_state = model.chat(
+        tokenizer,
+        image_state,
+        last_user_message,
+        generation_config=generation_config,
+        history=state,  # Exclude last assistant's response
+        return_history=True
+    )
+    # Update the state with new_state
+    state = new_state
+    # Update chatbot with the regenerated response
+    chatbot.append((last_user_message, response_text))
+    return chatbot, state, image_state
+def clear_all():
+    return [], None, None, None  # Clear chatbot, state, image_state, image_input
 # Build the Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# **H2O-Mississippi**")
+    state= gr.State()
+    image_state = gr.State()
     with gr.Row():
+        # First column with image input
+        with gr.Column(scale=1):
+            image_input = gr.Image(type="pil", label="Upload an Image")
+        # Second column with chatbot and user input
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(label="Conversation")
+            user_input = gr.Textbox(label="What is your question", placeholder="Type your message here")
     with gr.Accordion('Parameters', open=False):
+        with gr.Row():
+            temperature_input = gr.Slider(
+                minimum=0.0,
+                maximum=1.0,
+                step=0.1,
+                value=0.0,
+                interactive=True,
+                label="Temperature")
+            top_p_input = gr.Slider(
+                minimum=0.0,
+                maximum=1.0,
+                step=0.1,
+                value=0.9,
+                interactive=True,
+                label="Top P")
+            max_new_tokens_input = gr.Slider(
+                minimum=0,
+                maximum=4096,
+                step=64,
+                value=1024,
+                interactive=True,
+                label="Max New Tokens (default: 1024)"
+            )
     with gr.Row():
         submit_button = gr.Button("Submit")
         regenerate_button = gr.Button("Regenerate")
     # When the submit button is clicked, call the inference function
     submit_button.click(
         fn=inference,
+        inputs=[image_input, user_input, temperature_input, top_p_input, max_new_tokens_input, chatbot, state, image_state],
+        outputs=[chatbot, state, image_state, user_input]
     )
     # When the regenerate button is clicked, re-run the last inference
     regenerate_button.click(
+        fn=regenerate_response,
+        inputs=[chatbot, temperature_input, top_p_input,max_new_tokens_input, state, image_state],
+        outputs=[chatbot, state, image_state]
     )
     clear_button.click(
         fn=clear_all,
         inputs=None,
+        outputs=[chatbot, state, image_state, image_input]
     )
 demo.launch()