Spaces:

h2oai
/

h2ovl-mississippi

Running on A10G

App Files Files Community

Shanshan Wang commited on 16 days ago

Commit

7826ae6

•

1 Parent(s): 73b2bf3

clean up image_state

Browse files

Files changed (1) hide show

app.py +27 -35

app.py CHANGED Viewed

@@ -11,8 +11,6 @@ import os
 from huggingface_hub import login
 hf_token = os.environ.get('hf_token', None)
-# # Define the path to your model
-# path = "h2oai/h2ovl-mississippi-2b"
 # Define the models and their paths
 model_paths = {
@@ -45,21 +43,22 @@ def load_model_and_set_image_function(model_name):
     return model, tokenizer
-def inference(image,
               user_message,
               temperature,
               top_p,
               max_new_tokens,
               tile_num,
-              chatbot,state,
-              image_state,
               model_state,
               tokenizer_state):
     # Check if model_state is None
     if model_state is None or tokenizer_state is None:
         chatbot.append(("System", "Please select a model to start the conversation."))
-        return chatbot, state, image_state, ""
     model = model_state
     tokenizer = tokenizer_state
@@ -69,13 +68,9 @@ def inference(image,
     if chatbot is None:
         chatbot = []
-    if image is not None:
-        image_state = image
-    else:
-        # If image_state is None, then no image has been provided yet
-        if image_state is None:
-            chatbot.append(("System", "Please provide an image to start the conversation."))
-            return chatbot, state, image_state, ""
     # Initialize history (state) if it's None
     if state is None:
@@ -99,7 +94,7 @@ def inference(image,
     # Call model.chat with history
     response_text, new_state = model.chat(
         tokenizer,
-        image_state,
         user_message,
         max_tiles = int(tile_num),
         generation_config=generation_config,
@@ -112,7 +107,7 @@ def inference(image,
     # Update chatbot with the model's response
     chatbot[-1] = (user_message, response_text)
-    return chatbot, state, image_state, ""
 def regenerate_response(chatbot,
                         temperature,
@@ -120,14 +115,14 @@ def regenerate_response(chatbot,
                         max_new_tokens,
                         tile_num,
                         state,
-                        image_state,
                         model_state,
                         tokenizer_state):
     # Check if model_state is None
     if model_state is None or tokenizer_state is None:
         chatbot.append(("System", "Please select a model to start the conversation."))
-        return chatbot, state, image_state
     model = model_state
     tokenizer = tokenizer_state
@@ -137,19 +132,19 @@ def regenerate_response(chatbot,
     if chatbot is None or len(chatbot) == 0:
         chatbot = []
         chatbot.append(("System", "Nothing to regenerate. Please start a conversation first."))
-        return chatbot, state, image_state
     # Check if there is a previous user message
-    if state is None or image_state is None or len(state) == 0:
         chatbot.append(("System", "Nothing to regenerate. Please start a conversation first."))
-        return chatbot, state, image_state
     # Get the last user message
-    last_user_message, last_response = chatbot[-1]
     state = state[:-1]  # Remove last assistant's response from history
-    if len(state) == 0:
         state = None
     # Set generation config
     do_sample = (float(temperature) != 0.0)
@@ -164,7 +159,7 @@ def regenerate_response(chatbot,
     # Regenerate the response
     response_text, new_state = model.chat(
         tokenizer,
-        image_state,
         last_user_message,
         max_tiles = int(tile_num),
         generation_config=generation_config,
@@ -178,19 +173,17 @@ def regenerate_response(chatbot,
     # Update chatbot with the regenerated response
     chatbot.append((last_user_message, response_text))
-    return chatbot, state, image_state
 def clear_all():
-    return [], None, None, None  # Clear chatbot, state, image_state, image_input
 # Build the Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# **H2OVL-Mississippi**")
     state= gr.State()
-    image_state = gr.State()
     model_state = gr.State()
     tokenizer_state = gr.State()
     image_load_function_state = gr.State()
@@ -212,12 +205,12 @@ with gr.Blocks() as demo:
         # First column with image input
         with gr.Column(scale=1):
             image_input = gr.Image(type="filepath", label="Upload an Image")
         # Second column with chatbot and user input
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(label="Conversation")
             user_input = gr.Textbox(label="What is your question", placeholder="Type your message here")
     with gr.Accordion('Parameters', open=False):
         with gr.Row():
@@ -268,11 +261,10 @@ with gr.Blocks() as demo:
             tile_num,
             chatbot,
             state,
-            image_state,
             model_state,
             tokenizer_state
         ],
-        outputs=[chatbot, state, image_state, user_input]
     )
     # When the regenerate button is clicked, re-run the last inference
     regenerate_button.click(
@@ -283,18 +275,18 @@ with gr.Blocks() as demo:
             top_p_input,
             max_new_tokens_input,
             tile_num,
-            state,
-            image_state,
             model_state,
             tokenizer_state,
             ],
-        outputs=[chatbot, state, image_state]
     )
     clear_button.click(
         fn=clear_all,
         inputs=None,
-        outputs=[chatbot, state, image_state, image_input]
     )
     gr.Examples(
         examples=[
@@ -307,4 +299,4 @@ with gr.Blocks() as demo:
         label = "examples",
     )
-demo.launch()

 from huggingface_hub import login
 hf_token = os.environ.get('hf_token', None)
 # Define the models and their paths
 model_paths = {
     return model, tokenizer
+def inference(image_input,
               user_message,
               temperature,
               top_p,
               max_new_tokens,
               tile_num,
+              chatbot,
+              state,
+            #   image_state,
               model_state,
               tokenizer_state):
     # Check if model_state is None
     if model_state is None or tokenizer_state is None:
         chatbot.append(("System", "Please select a model to start the conversation."))
+        return chatbot, state, ""
     model = model_state
     tokenizer = tokenizer_state
     if chatbot is None:
         chatbot = []
+    if image_input is None:
+        chatbot.append(("System", "Please provide an image to start the conversation."))
+        return chatbot, state, ""
     # Initialize history (state) if it's None
     if state is None:
     # Call model.chat with history
     response_text, new_state = model.chat(
         tokenizer,
+        image_input,
         user_message,
         max_tiles = int(tile_num),
         generation_config=generation_config,
     # Update chatbot with the model's response
     chatbot[-1] = (user_message, response_text)
+    return chatbot, state, ""
 def regenerate_response(chatbot,
                         temperature,
                         max_new_tokens,
                         tile_num,
                         state,
+                        image_input,
                         model_state,
                         tokenizer_state):
     # Check if model_state is None
     if model_state is None or tokenizer_state is None:
         chatbot.append(("System", "Please select a model to start the conversation."))
+        return chatbot, state
     model = model_state
     tokenizer = tokenizer_state
     if chatbot is None or len(chatbot) == 0:
         chatbot = []
         chatbot.append(("System", "Nothing to regenerate. Please start a conversation first."))
+        return chatbot, state,
     # Check if there is a previous user message
+    if state is None or len(state) == 0:
         chatbot.append(("System", "Nothing to regenerate. Please start a conversation first."))
+        return chatbot, state
     # Get the last user message
+    last_user_message, _ = chatbot[-1]
     state = state[:-1]  # Remove last assistant's response from history
+    if len(state) == 0 or not state:
         state = None
     # Set generation config
     do_sample = (float(temperature) != 0.0)
     # Regenerate the response
     response_text, new_state = model.chat(
         tokenizer,
+        image_input,
         last_user_message,
         max_tiles = int(tile_num),
         generation_config=generation_config,
     # Update chatbot with the regenerated response
     chatbot.append((last_user_message, response_text))
+    return chatbot, state
 def clear_all():
+    return [], None, None, ""  # Clear chatbot, state, reset image_input
 # Build the Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# **H2OVL-Mississippi**")
     state= gr.State()
     model_state = gr.State()
     tokenizer_state = gr.State()
     image_load_function_state = gr.State()
         # First column with image input
         with gr.Column(scale=1):
             image_input = gr.Image(type="filepath", label="Upload an Image")
         # Second column with chatbot and user input
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(label="Conversation")
             user_input = gr.Textbox(label="What is your question", placeholder="Type your message here")
     with gr.Accordion('Parameters', open=False):
         with gr.Row():
             tile_num,
             chatbot,
             state,
             model_state,
             tokenizer_state
         ],
+        outputs=[chatbot, state, user_input]
     )
     # When the regenerate button is clicked, re-run the last inference
     regenerate_button.click(
             top_p_input,
             max_new_tokens_input,
             tile_num,
+            state,
+            image_input,
             model_state,
             tokenizer_state,
             ],
+        outputs=[chatbot, state]
     )
     clear_button.click(
         fn=clear_all,
         inputs=None,
+        outputs=[chatbot, state, image_input, user_input]
     )
     gr.Examples(
         examples=[
         label = "examples",
     )
+demo.launch()