Spaces:

h2oai
/

h2ovl-mississippi

Running on A10G

App Files Files Community

Shanshan Wang commited on 14 days ago

Commit

ab3d7d0

•

1 Parent(s): f588375

cache model

Browse files

Files changed (1) hide show

app.py +66 -45

app.py CHANGED Viewed

@@ -1,13 +1,14 @@
 import gradio as gr
-from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
 import torch
-import torchvision.transforms as T
-from PIL import Image
-import logging
-logging.basicConfig(level=logging.INFO)
-from torchvision.transforms.functional import InterpolationMode
 import os
 from huggingface_hub import login
 hf_token = os.environ.get('hf_token', None)
@@ -23,25 +24,40 @@ model_paths = {
 def load_model_and_set_image_function(model_name):
     # Get the model path from the model_paths dictionary
     model_path = model_paths[model_name]
-    # Load the model
-    model = AutoModel.from_pretrained(
-        model_path,
-        torch_dtype=torch.bfloat16,
-        low_cpu_mem_usage=True,
-        trust_remote_code=True,
-        use_auth_token=hf_token,
-        device_map="auto"
-    ).eval().cuda()
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_path,
-        trust_remote_code=True,
-        use_fast=False,
-        use_auth_token=hf_token
-    )
-    return model, tokenizer
 def inference(image_input,
@@ -52,22 +68,24 @@ def inference(image_input,
               tile_num,
               chatbot,
               state,
-              model_state,
-              tokenizer_state):
     # Check if model_state is None
-    if model_state is None or tokenizer_state is None:
         chatbot.append(("System", "Please select a model to start the conversation."))
         return chatbot, state, ""
     # Check for empty or invalid user message
     if not user_message or user_message.strip() == '' or user_message.lower() == 'system':
         chatbot.append(("System", "Please enter a valid message to continue the conversation."))
         return chatbot, state, ""
-    model = model_state
-    tokenizer = tokenizer_state
     # if image is provided, store it in image_state:
@@ -122,13 +140,20 @@ def regenerate_response(chatbot,
                         tile_num,
                         state,
                         image_input,
-                        model_state,
-                        tokenizer_state):
     # Check if model_state is None
-    if model_state is None or tokenizer_state is None:
         chatbot.append(("System", "Please select a model to start the conversation."))
         return chatbot, state
     # Check if there is a previous user message
     if chatbot is None or len(chatbot) == 0:
@@ -152,8 +177,6 @@ def regenerate_response(chatbot,
     else:
         state = None
-    model = model_state
-    tokenizer = tokenizer_state
     # Set generation config
     do_sample = (float(temperature) != 0.0)
@@ -195,8 +218,8 @@ with gr.Blocks() as demo:
     state= gr.State()
     model_state = gr.State()
-    tokenizer_state = gr.State()
-    image_load_function_state = gr.State()
     with gr.Row():
         model_dropdown = gr.Dropdown(
@@ -209,14 +232,14 @@ with gr.Blocks() as demo:
     model_dropdown.change(
         fn=load_model_and_set_image_function,
         inputs=[model_dropdown],
-        outputs=[model_state, tokenizer_state]
     )
     # Load the default model when the app starts
     demo.load(
         fn=load_model_and_set_image_function,
         inputs=[model_dropdown],
-        outputs=[model_state, tokenizer_state]
     )
     with gr.Row(equal_height=True):
@@ -282,8 +305,7 @@ with gr.Blocks() as demo:
             tile_num,
             chatbot,
             state,
-            model_state,
-            tokenizer_state
         ],
         outputs=[chatbot, state, user_input]
     )
@@ -298,8 +320,7 @@ with gr.Blocks() as demo:
             tile_num,
             state,
             image_input,
-            model_state,
-            tokenizer_state,
             ],
         outputs=[chatbot, state]
     )
@@ -319,5 +340,5 @@ with gr.Blocks() as demo:
         inputs = [image_input, user_input],
         label = "examples",
     )
-demo.queue(concurrency_count=4,max_size=10)
-demo.launch()

 import gradio as gr
+from transformers import AutoModel, AutoTokenizer
 import torch
+import threading
 import os
+# caching the mode
+model_cache = {}
+tokenizer_cache = {}
+model_lock = threading.Lock()
 from huggingface_hub import login
 hf_token = os.environ.get('hf_token', None)
 def load_model_and_set_image_function(model_name):
     # Get the model path from the model_paths dictionary
     model_path = model_paths[model_name]
+    with model_lock:
+        if model_name in model_cache:
+            # model is already loaded; retrieve it from the cache
+            print(f"Model {model_name} is already loaded. Retrieving from cache.")
+        else:
+            # load the model and tokenizer
+            print(f"Loading model {model_name}...")
+            model = AutoModel.from_pretrained(
+                model_path,
+                torch_dtype=torch.bfloat16,
+                low_cpu_mem_usage=True,
+                trust_remote_code=True,
+                use_auth_token=hf_token,
+                # device_map="auto"
+            ).eval().cuda()
+            tokenizer = AutoTokenizer.from_pretrained(
+                model_path,
+                trust_remote_code=True,
+                use_fast=False,
+                use_auth_token=hf_token
+            )
+            # add the model and tokenizer to the cache
+            model_cache[model_name] = model
+            tokenizer_cache[model_name] = tokenizer
+            print(f"Model {model_name} loaded successfully.")
+    return model_name
 def inference(image_input,
               tile_num,
               chatbot,
               state,
+              model_name):
     # Check if model_state is None
+    if model_name is None:
         chatbot.append(("System", "Please select a model to start the conversation."))
         return chatbot, state, ""
+    with model_lock:
+        if model_name not in model_cache:
+            chatbot.append(("System", "Model not loaded. Please wait for the model to load."))
+            return chatbot, state, ""
+        model = model_cache[model_name]
+        tokenizer = tokenizer_cache[model_name]
     # Check for empty or invalid user message
     if not user_message or user_message.strip() == '' or user_message.lower() == 'system':
         chatbot.append(("System", "Please enter a valid message to continue the conversation."))
         return chatbot, state, ""
     # if image is provided, store it in image_state:
                         tile_num,
                         state,
                         image_input,
+                        model_name):
     # Check if model_state is None
+    if model_name is None:
         chatbot.append(("System", "Please select a model to start the conversation."))
         return chatbot, state
+    with model_lock:
+        if model_name not in model_cache:
+            chatbot.append(("System", "Model not loaded. Please wait for the model to load."))
+            return chatbot, state
+        model = model_cache[model_name]
+        tokenizer = tokenizer_cache[model_name]
     # Check if there is a previous user message
     if chatbot is None or len(chatbot) == 0:
     else:
         state = None
     # Set generation config
     do_sample = (float(temperature) != 0.0)
     state= gr.State()
     model_state = gr.State()
+    # tokenizer_state = gr.State()
+    # image_load_function_state = gr.State()
     with gr.Row():
         model_dropdown = gr.Dropdown(
     model_dropdown.change(
         fn=load_model_and_set_image_function,
         inputs=[model_dropdown],
+        outputs=[model_state]
     )
     # Load the default model when the app starts
     demo.load(
         fn=load_model_and_set_image_function,
         inputs=[model_dropdown],
+        outputs=[model_state]
     )
     with gr.Row(equal_height=True):
             tile_num,
             chatbot,
             state,
+            model_state
         ],
         outputs=[chatbot, state, user_input]
     )
             tile_num,
             state,
             image_input,
+            model_state
             ],
         outputs=[chatbot, state]
     )
         inputs = [image_input, user_input],
         label = "examples",
     )
+demo.queue()
+demo.launch(max_threads=10)