Spaces:

lillab-demos
/

respect

Running on Zero

App Files Files Community

chenzizhao commited on 13 days ago

Commit

87b7a45

•

1 Parent(s): 2f56479

cosmetics

Browse files

Files changed (4) hide show

adapter.py +7 -6
app.py +37 -48
config_generator.py +2 -2
utils.py +2 -2

adapter.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import List, Set, Tuple, TypeVar
 import torch
 from PIL import Image
 from utils import device, nested_apply, sorted_list
@@ -55,12 +56,12 @@ class IdeficsAdapter:
         .index_fill_(0, torch.tensor(LEGAL_TOKEN_IDS), 1).to(device=device(), dtype=torch.bool)
     SUPPRESS_TOKEN_IDS = list(set(range(32003)) - set(LEGAL_TOKEN_IDS))
-    def __init__(self, image_folder: str, processor) -> None:
         self.t_max_length = 2048
         self.image_folder = Path(image_folder)
         self.image_cache = {}
         self.processor = processor
-        self.tokenizer = self.processor.tokenizer
     def get_image(self, im_name: N) -> Image.Image:
         if im_name not in self.image_cache:
@@ -68,10 +69,10 @@ class IdeficsAdapter:
                 self.image_folder.joinpath(im_name))
         return self.image_cache[im_name]
-    def unhash(self, context: List[N], c: str):
         return AlphabeticNameHash(tuple(context)).unhash(c)
-    def valid_hash(self, context: List[N], c: str):
         return AlphabeticNameHash(tuple(context)).valid_hash(c)
     def parse(self, context: List[N], decoded_out: str,
@@ -121,13 +122,13 @@ class IdeficsAdapter:
         xs = re.search(select_pattern, last_answer)
         if xs is not None:
             xs = xs.group()
-        selections = set(xs.split(" ")[1:]) if xs else set()
         deselect_pattern = r"^deselect( [A-J])+"
         xs = re.search(deselect_pattern, last_answer)
         if xs is not None:
             xs = xs.group()
-        deselections = set(xs.split(" ")[1:]) if xs else set()
         return selections, deselections

 import torch
 from PIL import Image
+from transformers import Idefics2Processor, PreTrainedTokenizer
 from utils import device, nested_apply, sorted_list
         .index_fill_(0, torch.tensor(LEGAL_TOKEN_IDS), 1).to(device=device(), dtype=torch.bool)
     SUPPRESS_TOKEN_IDS = list(set(range(32003)) - set(LEGAL_TOKEN_IDS))
+    def __init__(self, image_folder: str, processor: Idefics2Processor) -> None:
         self.t_max_length = 2048
         self.image_folder = Path(image_folder)
         self.image_cache = {}
         self.processor = processor
+        self.tokenizer: PreTrainedTokenizer = self.processor.tokenizer  # type: ignore
     def get_image(self, im_name: N) -> Image.Image:
         if im_name not in self.image_cache:
                 self.image_folder.joinpath(im_name))
         return self.image_cache[im_name]
+    def unhash(self, context: List[N], c: str) -> N:
         return AlphabeticNameHash(tuple(context)).unhash(c)
+    def valid_hash(self, context: List[N], c: str) -> bool:
         return AlphabeticNameHash(tuple(context)).valid_hash(c)
     def parse(self, context: List[N], decoded_out: str,
         xs = re.search(select_pattern, last_answer)
         if xs is not None:
             xs = xs.group()
+        selections: Set[N] = set(xs.split(" ")[1:]) if xs else set()
         deselect_pattern = r"^deselect( [A-J])+"
         xs = re.search(deselect_pattern, last_answer)
         if xs is not None:
             xs = xs.group()
+        deselections: Set[N] = set(xs.split(" ")[1:]) if xs else set()
         return selections, deselections

app.py CHANGED Viewed

@@ -3,13 +3,13 @@ import logging
 import os
 from typing import Any, Dict, List
-import gradio as gr  # type: ignore
 import PIL.Image as Image
 import PIL.ImageOps as ImageOps
-import spaces  # type: ignore
 import torch
-from peft import PeftModel  # type: ignore
-from transformers import AutoProcessor  # type: ignore
 from transformers import Idefics2ForConditionalGeneration, Idefics2Processor
 from adapter import IdeficsAdapter
@@ -18,15 +18,6 @@ from utils import device, nested_to_device, sorted_list
 import copy
 ### Constants
-css="""
-.radio-group .wrap {
-    display: grid;
-    grid-template-columns: repeat(5, 1fr);
-    grid-template-rows: repeat(5, 1fr);
-    width: 100%;
-    height: 100%
-}
-"""
 IMG_DIR = "tangram_pngs"
@@ -56,18 +47,18 @@ def get_model_response(  # predict
     new_chats = chats + [chat]
     currently_selected = previous_selected[-1] if len(previous_selected) > 0 else []
-    model_input: Dict[str, Any] = adapter.compose(  # type: ignore
         image_paths, new_chats, previous_selected, True, False)
-    model_input = nested_to_device(model_input)  # type: ignore
     with torch.inference_mode(), torch.autocast(device_type=device().type,
                                                 dtype=torch.bfloat16):
-        model_output = model.generate(**model_input, **GEN_KWS)  # type: ignore
-    decoded_out: str = adapter.tokenizer.decode(  # type: ignore
         model_output.sequences[0], skip_special_tokens=True)
     model_clicks = adapter.parse(
-        image_paths, decoded_out, currently_selected)  # type: ignore
     if len(model_clicks) == 0:
         logging.warning("empty clicks by model")
@@ -87,10 +78,9 @@ def get_model_response(  # predict
 def get_model() -> PeftModel:
     model_id = 'lil-lab/respect'
     checkpoint = "HuggingFaceM4/idefics2-8b"
-    model = Idefics2ForConditionalGeneration.from_pretrained(  # type: ignore
-        checkpoint, torch_dtype=torch.bfloat16,
-    )
-    peft_model = PeftModel.from_pretrained(  # type: ignore
         model, model_id, adapter_name="r6_bp", is_trainable=False, revision="r6_bp")
     # Add other adapter - hack to avoid conflict
@@ -105,10 +95,10 @@ def get_model() -> PeftModel:
 def get_processor() -> Idefics2Processor:
     checkpoint = "HuggingFaceM4/idefics2-8b"
-    processor = AutoProcessor.from_pretrained(  # type: ignore
         checkpoint, do_image_splitting=False,
         size={"longest_edge": 224, "shortest_edge": 224})
-    return processor # type: ignore
 def get_adapter() -> IdeficsAdapter:
     processor = get_processor()
@@ -147,7 +137,6 @@ class GameState:
         changes = self.selected_accum[-1] if len(self.selected_accum) > 0 else []
         tangram_list = self._display_context(context, targets, changes, selected)
-        # return [(img, f"Image {i+1}") for i, img in enumerate(tangram_list)]
         return tangram_list
     @staticmethod
@@ -234,30 +223,29 @@ def create_app_inner():
         To start a game, first select whether you wish to play against our \
         initial trained model ("Initial System") or \
         our model at the end of continual learning ("Final System") \
-        and press the "Start Game" button. \
-        You will take on a "speaker" role at each round. \
-        Your goal is to describe this image (via a message in the textbox) \
-        so that the model can guess what it is.'
-    )
-    gr.Markdown("Targets have black borders. Correctly selected targets have green borders. Incorrectly selected targets have red borders. Actions are marked with yellow dot.")
-    gr.Markdown("The listener cannot see boxes or colors and the order is different.")
     gr.Markdown(
         '### Press "Send" to submit your action to proceed to the next turn. \
-        You have 10 turns in total.'
-    )
     with gr.Row():
         model_iteration = gr.Radio(["Initial System", "Final System"],
                                    label="Model Iteration",
                                    value="Final System")
         start_btn = gr.Button("Start Game")
-    with gr.Row():
-        current_turn = gr.Textbox(label="TURN")
-        success = gr.Textbox(label="Success")
     with gr.Row():
         image_output = gr.Gallery(
@@ -268,9 +256,9 @@ def create_app_inner():
     with gr.Row():
         conversation_output = gr.Textbox(label="Interaction History")
-        user_input = gr.Textbox(label="Your Message as Speaker", interactive=True)
-    send_btn = gr.Button("Send", interactive=True)
     ### globals
     model = get_model()
@@ -280,12 +268,13 @@ def create_app_inner():
     ### callbacks
     def output_from_state(state: GameState):
         has_ended = state.has_ended()
-        success = "success" if state.has_successfully_ended() else "failure"
         return (
             state.markup_images(),  # image_output
             state.serialize_conversation(),  # conversation_output
-            f"{state.turn+1}/10",  # current_turn
-            success if has_ended else "n/a",  # success
             gr.update(interactive=not has_ended, value=""),  # user_input
             gr.update(interactive=not has_ended),  # send_btn
             gr.update(interactive=has_ended),  # model_iteration
@@ -309,7 +298,7 @@ def create_app_inner():
     start_btn.click(
         on_start_interaction,
         inputs=[model_iteration],
-        outputs=[image_output, conversation_output, current_turn, success,
             user_input, send_btn, model_iteration, game_state],
         queue=False
     )
@@ -317,14 +306,14 @@ def create_app_inner():
     send_btn.click(
         on_send_message,
         inputs=[user_input, game_state],
-        outputs=[image_output, conversation_output, current_turn, success,
                  user_input, send_btn, model_iteration, game_state],
         queue=True
     )
 def create_app():
-    with gr.Blocks(css=css) as app:
         create_app_inner()
     return app

 import os
 from typing import Any, Dict, List
+import gradio as gr
 import PIL.Image as Image
 import PIL.ImageOps as ImageOps
+import spaces
 import torch
+from peft import PeftModel
+from transformers import AutoProcessor
 from transformers import Idefics2ForConditionalGeneration, Idefics2Processor
 from adapter import IdeficsAdapter
 import copy
 ### Constants
 IMG_DIR = "tangram_pngs"
     new_chats = chats + [chat]
     currently_selected = previous_selected[-1] if len(previous_selected) > 0 else []
+    model_input: Dict[str, Any] = adapter.compose(
         image_paths, new_chats, previous_selected, True, False)
+    model_input = nested_to_device(model_input)
     with torch.inference_mode(), torch.autocast(device_type=device().type,
                                                 dtype=torch.bfloat16):
+        model_output = model.generate(**model_input, **GEN_KWS)
+    decoded_out: str = adapter.tokenizer.decode(
         model_output.sequences[0], skip_special_tokens=True)
     model_clicks = adapter.parse(
+        image_paths, decoded_out, currently_selected)
     if len(model_clicks) == 0:
         logging.warning("empty clicks by model")
 def get_model() -> PeftModel:
     model_id = 'lil-lab/respect'
     checkpoint = "HuggingFaceM4/idefics2-8b"
+    model = Idefics2ForConditionalGeneration.from_pretrained(
+        checkpoint, torch_dtype=torch.bfloat16,)
+    peft_model = PeftModel.from_pretrained(
         model, model_id, adapter_name="r6_bp", is_trainable=False, revision="r6_bp")
     # Add other adapter - hack to avoid conflict
 def get_processor() -> Idefics2Processor:
     checkpoint = "HuggingFaceM4/idefics2-8b"
+    processor = AutoProcessor.from_pretrained(
         checkpoint, do_image_splitting=False,
         size={"longest_edge": 224, "shortest_edge": 224})
+    return processor
 def get_adapter() -> IdeficsAdapter:
     processor = get_processor()
         changes = self.selected_accum[-1] if len(self.selected_accum) > 0 else []
         tangram_list = self._display_context(context, targets, changes, selected)
         return tangram_list
     @staticmethod
         To start a game, first select whether you wish to play against our \
         initial trained model ("Initial System") or \
         our model at the end of continual learning ("Final System") \
+        and press the "Start Game" button.')
+    gr.Markdown(
+        'You will take on a "speaker" role at each round. \
+        Your goal is to describe this image (via a message in the textbox) \
+        so that the model can guess what it is.\
+        Targets have black borders. \
+        Correctly selected targets have green borders. \
+        Incorrectly selected targets have red borders. \
+        Actions are marked with yellow dot. \
+        The listener cannot see boxes or colors and the order is different.')
     gr.Markdown(
         '### Press "Send" to submit your action to proceed to the next turn. \
+        You have 10 turns in total.')
     with gr.Row():
         model_iteration = gr.Radio(["Initial System", "Final System"],
                                    label="Model Iteration",
                                    value="Final System")
         start_btn = gr.Button("Start Game")
+        status = gr.Textbox(label="Status", interactive=False, show_label=False,
+                            text_align="center", value="Please start a game.")
     with gr.Row():
         image_output = gr.Gallery(
     with gr.Row():
         conversation_output = gr.Textbox(label="Interaction History")
+        with gr.Column():
+            user_input = gr.Textbox(label="Your Message as Speaker", interactive=True)
+            send_btn = gr.Button("Send", interactive=True)
     ### globals
     model = get_model()
     ### callbacks
     def output_from_state(state: GameState):
         has_ended = state.has_ended()
+        success = "Success" if state.has_successfully_ended() else "Failure"
+        status = f"{success} (Turn {state.turn}/10) - Start another game?" \
+            if has_ended else f"Turn {state.turn+1}/10"
         return (
             state.markup_images(),  # image_output
             state.serialize_conversation(),  # conversation_output
+            status,  # status
             gr.update(interactive=not has_ended, value=""),  # user_input
             gr.update(interactive=not has_ended),  # send_btn
             gr.update(interactive=has_ended),  # model_iteration
     start_btn.click(
         on_start_interaction,
         inputs=[model_iteration],
+        outputs=[image_output, conversation_output, status,
             user_input, send_btn, model_iteration, game_state],
         queue=False
     )
     send_btn.click(
         on_send_message,
         inputs=[user_input, game_state],
+        outputs=[image_output, conversation_output, status,
                  user_input, send_btn, model_iteration, game_state],
         queue=True
     )
 def create_app():
+    with gr.Blocks(theme='saq1b/gradio-theme') as app:
         create_app_inner()
     return app

config_generator.py CHANGED Viewed

@@ -35,8 +35,8 @@ def generate_game_config() -> GameConfig:
     return config
 @functools.cache
-def _get_data(restricted_dataset: bool=False):
-    if not restricted_dataset:
         # 1013 images
         paths = os.listdir(EMPTY_DATA_PATH)
     else:

     return config
 @functools.cache
+def _get_data(hb_split: bool=True):
+    if not hb_split:
         # 1013 images
         paths = os.listdir(EMPTY_DATA_PATH)
     else:

utils.py CHANGED Viewed

@@ -10,11 +10,11 @@ def sorted_list(s: Set[str]) -> List[str]:
 def device():
     return torch.device("cuda" if torch.cuda.is_available() else "cpu")
-def nested_to_device(s):  # type: ignore
     # s is either a tensor or a dictionary
     if isinstance(s, torch.Tensor):
         return s.to(device())
-    return {k: v.to(device()) for k, v in s.items()}  # type: ignore
 def nested_apply(h, s):
     # h is an unary function, s is one of N, tuple of N, list of N, or set of N

 def device():
     return torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def nested_to_device(s):
     # s is either a tensor or a dictionary
     if isinstance(s, torch.Tensor):
         return s.to(device())
+    return {k: v.to(device()) for k, v in s.items()}
 def nested_apply(h, s):
     # h is an unary function, s is one of N, tuple of N, list of N, or set of N