Spaces:
Runtime error
Runtime error
VictorSanh
commited on
Commit
•
5011842
1
Parent(s):
ec50e73
looots of fixes
Browse files- app_dialogue.py +103 -121
app_dialogue.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import copy
|
2 |
import hashlib
|
3 |
-
import logging
|
4 |
import os
|
5 |
import re
|
6 |
import torch
|
@@ -15,7 +14,7 @@ from PIL import Image
|
|
15 |
import gradio as gr
|
16 |
from gradio import processing_utils
|
17 |
from gradio_client.client import DEFAULT_TEMP_DIR
|
18 |
-
from transformers import AutoProcessor, AutoModelForCausalLM, TextIteratorStreamer
|
19 |
|
20 |
from utils import create_model_inputs
|
21 |
|
@@ -27,18 +26,16 @@ MODELS = {
|
|
27 |
trust_remote_code=True,
|
28 |
torch_dtype=torch.bfloat16,
|
29 |
token=os.environ["HF_AUTH_TOKEN"],
|
30 |
-
|
31 |
).to(DEVICE),
|
32 |
"HuggingFaceM4/idefics2": AutoModelForCausalLM.from_pretrained(
|
33 |
"HuggingFaceM4/idefics2",
|
34 |
trust_remote_code=True,
|
35 |
torch_dtype=torch.bfloat16,
|
36 |
token=os.environ["HF_AUTH_TOKEN"],
|
37 |
-
|
38 |
).to(DEVICE),
|
39 |
}
|
40 |
-
|
41 |
-
|
42 |
PROCESSOR = AutoProcessor.from_pretrained(
|
43 |
"HuggingFaceM4/idefics2",
|
44 |
token=os.environ["HF_AUTH_TOKEN"],
|
@@ -75,24 +72,10 @@ SYSTEM_PROMPT = [
|
|
75 |
# """\nAssistant: There is no dogs in this image. The picture shows a tennis player jumping to volley the ball.<end_of_utterance>""",
|
76 |
]
|
77 |
|
78 |
-
BAN_TOKENS = ( # For documentation puporse. We are not using this list, it is hardcoded inside `idefics_causal_lm.py` inside TGI.
|
79 |
-
"<image>;<fake_token_around_image>"
|
80 |
-
)
|
81 |
-
STOP_SUSPECT_LIST = []
|
82 |
-
|
83 |
API_TOKEN = os.getenv("HF_AUTH_TOKEN")
|
84 |
# IDEFICS_LOGO = "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/IDEFICS_logo.png"
|
85 |
-
|
86 |
-
PROCESSOR = AutoProcessor.from_pretrained(
|
87 |
-
"HuggingFaceM4/idefics-9b-instruct",
|
88 |
-
token=API_TOKEN,
|
89 |
-
)
|
90 |
-
|
91 |
BOT_AVATAR = "IDEFICS_logo.png"
|
92 |
|
93 |
-
logging.basicConfig(level=logging.INFO)
|
94 |
-
logger = logging.getLogger()
|
95 |
-
|
96 |
|
97 |
# Monkey patch adapted from gradio.components.image.Image - mostly to make the `save` step optional in `pil_to_temp_file`
|
98 |
def hash_bytes(bytes: bytes):
|
@@ -247,6 +230,25 @@ def prompt_list_to_markdown(prompt_list: List[str]) -> str:
|
|
247 |
resulting_string += elem
|
248 |
return resulting_string
|
249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
def remove_spaces_around_token(text: str) -> str:
|
251 |
pattern = r"\s*(<fake_token_around_image>)\s*"
|
252 |
replacement = r"\1"
|
@@ -482,17 +484,7 @@ with gr.Blocks(title="IDEFICS Playground", theme=gr.themes.Base()) as demo:
|
|
482 |
)
|
483 |
|
484 |
# Creating model inputs
|
485 |
-
images =
|
486 |
-
for idx, part in enumerate(formated_prompt_list):
|
487 |
-
if is_image(part):
|
488 |
-
if is_url(part):
|
489 |
-
images.append(fetch_images([part])[0])
|
490 |
-
else:
|
491 |
-
images.append(Image.open(part))
|
492 |
-
formated_prompt_list[idx] = f"{FAKE_TOK_AROUND_IMAGE}{'<image>' * IMAGE_SEQ_LEN}{FAKE_TOK_AROUND_IMAGE}"
|
493 |
-
input_text = "".join(formated_prompt_list)
|
494 |
-
input_text = input_text.replace(FAKE_TOK_AROUND_IMAGE * 2, FAKE_TOK_AROUND_IMAGE)
|
495 |
-
input_text = BOS_TOKEN + input_text
|
496 |
inputs = create_model_inputs([input_text], [images])
|
497 |
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
498 |
generation_args.update(inputs)
|
@@ -558,17 +550,7 @@ with gr.Blocks(title="IDEFICS Playground", theme=gr.themes.Base()) as demo:
|
|
558 |
)
|
559 |
|
560 |
# Creating model inputs
|
561 |
-
images =
|
562 |
-
for idx, part in enumerate(formated_prompt_list):
|
563 |
-
if is_image(part):
|
564 |
-
if is_url(part):
|
565 |
-
images.append(fetch_images([part])[0])
|
566 |
-
else:
|
567 |
-
images.append(Image.open(part))
|
568 |
-
formated_prompt_list[idx] = f"{FAKE_TOK_AROUND_IMAGE}{'<image>' * IMAGE_SEQ_LEN}{FAKE_TOK_AROUND_IMAGE}"
|
569 |
-
input_text = "".join(formated_prompt_list)
|
570 |
-
input_text = input_text.replace(FAKE_TOK_AROUND_IMAGE * 2, FAKE_TOK_AROUND_IMAGE)
|
571 |
-
input_text = BOS_TOKEN + input_text
|
572 |
inputs = create_model_inputs([input_text], [images])
|
573 |
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
574 |
generation_args.update(inputs)
|
@@ -653,85 +635,85 @@ with gr.Blocks(title="IDEFICS Playground", theme=gr.themes.Base()) as demo:
|
|
653 |
textbox.submit(lambda : gr.update(label='📁 Upload image', interactive=True), [], upload_btn)
|
654 |
clear_btn.click(lambda : gr.update(label='📁 Upload image', interactive=True), [], upload_btn)
|
655 |
|
656 |
-
examples_path = os.path.dirname(__file__)
|
657 |
-
gr.Examples(
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
|
664 |
-
|
665 |
-
|
666 |
-
|
667 |
-
|
668 |
-
|
669 |
-
|
670 |
-
|
671 |
-
|
672 |
-
|
673 |
-
|
674 |
-
|
675 |
-
|
676 |
-
|
677 |
-
|
678 |
-
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
|
683 |
-
|
684 |
-
|
685 |
-
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
|
699 |
-
|
700 |
-
|
701 |
-
|
702 |
-
|
703 |
-
|
704 |
-
|
705 |
-
|
706 |
-
|
707 |
-
|
708 |
-
|
709 |
-
|
710 |
-
|
711 |
-
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
)
|
735 |
|
736 |
demo.queue(max_size=40)
|
737 |
demo.launch()
|
|
|
1 |
import copy
|
2 |
import hashlib
|
|
|
3 |
import os
|
4 |
import re
|
5 |
import torch
|
|
|
14 |
import gradio as gr
|
15 |
from gradio import processing_utils
|
16 |
from gradio_client.client import DEFAULT_TEMP_DIR
|
17 |
+
from transformers import AutoProcessor, AutoModelForCausalLM, TextIteratorStreamer, logging
|
18 |
|
19 |
from utils import create_model_inputs
|
20 |
|
|
|
26 |
trust_remote_code=True,
|
27 |
torch_dtype=torch.bfloat16,
|
28 |
token=os.environ["HF_AUTH_TOKEN"],
|
29 |
+
revision="1e05755c1c5cb2077a0f60b83ea1368c22a17282",
|
30 |
).to(DEVICE),
|
31 |
"HuggingFaceM4/idefics2": AutoModelForCausalLM.from_pretrained(
|
32 |
"HuggingFaceM4/idefics2",
|
33 |
trust_remote_code=True,
|
34 |
torch_dtype=torch.bfloat16,
|
35 |
token=os.environ["HF_AUTH_TOKEN"],
|
36 |
+
revision="5cd3c3a3eb5e0ea664f5ac09e73c9ef42da93a86",
|
37 |
).to(DEVICE),
|
38 |
}
|
|
|
|
|
39 |
PROCESSOR = AutoProcessor.from_pretrained(
|
40 |
"HuggingFaceM4/idefics2",
|
41 |
token=os.environ["HF_AUTH_TOKEN"],
|
|
|
72 |
# """\nAssistant: There is no dogs in this image. The picture shows a tennis player jumping to volley the ball.<end_of_utterance>""",
|
73 |
]
|
74 |
|
|
|
|
|
|
|
|
|
|
|
75 |
API_TOKEN = os.getenv("HF_AUTH_TOKEN")
|
76 |
# IDEFICS_LOGO = "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/IDEFICS_logo.png"
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
BOT_AVATAR = "IDEFICS_logo.png"
|
78 |
|
|
|
|
|
|
|
79 |
|
80 |
# Monkey patch adapted from gradio.components.image.Image - mostly to make the `save` step optional in `pil_to_temp_file`
|
81 |
def hash_bytes(bytes: bytes):
|
|
|
230 |
resulting_string += elem
|
231 |
return resulting_string
|
232 |
|
233 |
+
|
234 |
+
def prompt_list_to_model_input(prompt_list: List[str]) -> Tuple[str, List[Image.Image]]:
|
235 |
+
"""
|
236 |
+
Create the final input string and image list to feed to the model's processor.
|
237 |
+
"""
|
238 |
+
images = []
|
239 |
+
for idx, part in enumerate(prompt_list):
|
240 |
+
if is_image(part):
|
241 |
+
if is_url(part):
|
242 |
+
images.append(fetch_images([part])[0])
|
243 |
+
else:
|
244 |
+
images.append(Image.open(part))
|
245 |
+
prompt_list[idx] = f"{FAKE_TOK_AROUND_IMAGE}{'<image>' * IMAGE_SEQ_LEN}{FAKE_TOK_AROUND_IMAGE}"
|
246 |
+
input_text = "".join(prompt_list)
|
247 |
+
input_text = input_text.replace(FAKE_TOK_AROUND_IMAGE * 2, FAKE_TOK_AROUND_IMAGE)
|
248 |
+
input_text = BOS_TOKEN + input_text.strip()
|
249 |
+
return input_text, images
|
250 |
+
|
251 |
+
|
252 |
def remove_spaces_around_token(text: str) -> str:
|
253 |
pattern = r"\s*(<fake_token_around_image>)\s*"
|
254 |
replacement = r"\1"
|
|
|
484 |
)
|
485 |
|
486 |
# Creating model inputs
|
487 |
+
input_text, images = prompt_list_to_model_input(formated_prompt_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
488 |
inputs = create_model_inputs([input_text], [images])
|
489 |
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
490 |
generation_args.update(inputs)
|
|
|
550 |
)
|
551 |
|
552 |
# Creating model inputs
|
553 |
+
input_text, images = prompt_list_to_model_input(formated_prompt_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
554 |
inputs = create_model_inputs([input_text], [images])
|
555 |
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
556 |
generation_args.update(inputs)
|
|
|
635 |
textbox.submit(lambda : gr.update(label='📁 Upload image', interactive=True), [], upload_btn)
|
636 |
clear_btn.click(lambda : gr.update(label='📁 Upload image', interactive=True), [], upload_btn)
|
637 |
|
638 |
+
# examples_path = os.path.dirname(__file__)
|
639 |
+
# gr.Examples(
|
640 |
+
# examples=[
|
641 |
+
# [
|
642 |
+
# (
|
643 |
+
# "Which famous person does the person in the image look like? Could you craft an engaging narrative"
|
644 |
+
# " featuring this character from the image as the main protagonist?"
|
645 |
+
# ),
|
646 |
+
# f"{examples_path}/example_images/obama-harry-potter.jpg",
|
647 |
+
# ],
|
648 |
+
# [
|
649 |
+
# "Can you describe the image? Do you think it's real?",
|
650 |
+
# f"{examples_path}/example_images/rabbit_force.png",
|
651 |
+
# ],
|
652 |
+
# ["Explain this meme to me.", f"{examples_path}/example_images/meme_french.jpg"],
|
653 |
+
# ["Give me a short and easy recipe for this dish.", f"{examples_path}/example_images/recipe_burger.webp"],
|
654 |
+
# [
|
655 |
+
# "I want to go somewhere similar to the one in the photo. Give me destinations and travel tips.",
|
656 |
+
# f"{examples_path}/example_images/travel_tips.jpg",
|
657 |
+
# ],
|
658 |
+
# [
|
659 |
+
# "Can you name the characters in the image and give their French names?",
|
660 |
+
# f"{examples_path}/example_images/gaulois.png",
|
661 |
+
# ],
|
662 |
+
# ["Write a complete sales ad for this product.", f"{examples_path}/example_images/product_ad.jpg"],
|
663 |
+
# [
|
664 |
+
# (
|
665 |
+
# "As an art critic AI assistant, could you describe this painting in details and make a thorough"
|
666 |
+
# " critic?"
|
667 |
+
# ),
|
668 |
+
# f"{examples_path}/example_images/art_critic.png",
|
669 |
+
# ],
|
670 |
+
# [
|
671 |
+
# "Can you tell me a very short story based on this image?",
|
672 |
+
# f"{examples_path}/example_images/chicken_on_money.png",
|
673 |
+
# ],
|
674 |
+
# ["Write 3 funny meme texts about this image.", f"{examples_path}/example_images/elon_smoking.jpg"],
|
675 |
+
# [
|
676 |
+
# "Who is in this picture? Why do people find it surprising?",
|
677 |
+
# f"{examples_path}/example_images/pope_doudoune.webp",
|
678 |
+
# ],
|
679 |
+
# ["What are the armed baguettes guarding?", f"{examples_path}/example_images/baguettes_guarding_paris.png"],
|
680 |
+
# ["What is this animal and why is it unusual?", f"{examples_path}/example_images/blue_dog.png"],
|
681 |
+
# [
|
682 |
+
# "What is this object and do you think it is horrifying?",
|
683 |
+
# f"{examples_path}/example_images/can_horror.png",
|
684 |
+
# ],
|
685 |
+
# [
|
686 |
+
# (
|
687 |
+
# "What is this sketch for? How would you make an argument to prove this sketch was made by Picasso"
|
688 |
+
# " himself?"
|
689 |
+
# ),
|
690 |
+
# f"{examples_path}/example_images/cat_sketch.png",
|
691 |
+
# ],
|
692 |
+
# ["Which celebrity does this claymation figure look like?", f"{examples_path}/example_images/kanye.jpg"],
|
693 |
+
# ["What can you tell me about the cap in this image?", f"{examples_path}/example_images/ironman_cap.png"],
|
694 |
+
# [
|
695 |
+
# "Can you write an advertisement for Coca-Cola based on this image?",
|
696 |
+
# f"{examples_path}/example_images/polar_bear_coke.png",
|
697 |
+
# ],
|
698 |
+
# [
|
699 |
+
# "What is happening in this image? Which famous personality does this person in center looks like?",
|
700 |
+
# f"{examples_path}/example_images/gandhi_selfie.jpg",
|
701 |
+
# ],
|
702 |
+
# [
|
703 |
+
# "What do you think the dog is doing and is it unusual?",
|
704 |
+
# f"{examples_path}/example_images/surfing_dog.jpg",
|
705 |
+
# ],
|
706 |
+
# ],
|
707 |
+
# inputs=[textbox, imagebox],
|
708 |
+
# outputs=[textbox, imagebox, chatbot],
|
709 |
+
# fn=process_example,
|
710 |
+
# cache_examples=False,
|
711 |
+
# examples_per_page=6,
|
712 |
+
# label=(
|
713 |
+
# "Click on any example below to get started.\nFor convenience, the model generations have been"
|
714 |
+
# " pre-computed with `idefics-80b-instruct`."
|
715 |
+
# ),
|
716 |
+
# )
|
717 |
|
718 |
demo.queue(max_size=40)
|
719 |
demo.launch()
|