Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -31,6 +31,8 @@ from llava.mm_utils import (
|
|
31 |
KeywordsStoppingCriteria,
|
32 |
)
|
33 |
|
|
|
|
|
34 |
from PIL import Image
|
35 |
|
36 |
import requests
|
@@ -82,6 +84,8 @@ class InferenceDemo(object):
|
|
82 |
conv_mode = "mpt"
|
83 |
elif "qwen" in model_name.lower():
|
84 |
conv_mode = "qwen_1_5"
|
|
|
|
|
85 |
else:
|
86 |
conv_mode = "llava_v0"
|
87 |
|
@@ -289,6 +293,8 @@ with gr.Blocks(
|
|
289 |
[[Blog]](https://llava-vl.github.io/blog/2024-06-16-llava-next-interleave/) [[Code]](https://github.com/LLaVA-VL/LLaVA-NeXT) [[Model]](https://huggingface.co/lmms-lab/llava-next-interleave-7b)
|
290 |
Note: The internleave checkpoint is updated (Date: Jul. 24, 2024), the wrong checkpiont is used before.
|
291 |
"""
|
|
|
|
|
292 |
tos_markdown = """
|
293 |
### TODO!. Terms of use
|
294 |
By using this service, users are required to agree to the following terms:
|
@@ -304,7 +310,9 @@ with gr.Blocks(
|
|
304 |
"LLaVA-Interleave-7B",
|
305 |
]
|
306 |
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
307 |
-
gr.Markdown(title_markdown)
|
|
|
|
|
308 |
with gr.Column():
|
309 |
with gr.Row():
|
310 |
chatbot = gr.Chatbot([], elem_id="chatbot", bubble_full_width=False)
|
@@ -327,79 +335,14 @@ with gr.Blocks(
|
|
327 |
print(cur_dir)
|
328 |
gr.Examples(
|
329 |
examples=[
|
330 |
-
# [
|
331 |
-
# {
|
332 |
-
# "text": "<image> <image> <image> Which image shows a different mood of character from the others?",
|
333 |
-
# "files": [f"{cur_dir}/examples/examples_image12.jpg", f"{cur_dir}/examples/examples_image13.jpg", f"{cur_dir}/examples/examples_image14.jpg"]
|
334 |
-
# },
|
335 |
-
# {
|
336 |
-
# "text": "Please pay attention to the movement of the object from the first image to the second image, then write a HTML code to show this movement.",
|
337 |
-
# "files": [
|
338 |
-
# f"{cur_dir}/examples/code1.jpeg",
|
339 |
-
# f"{cur_dir}/examples/code2.jpeg",
|
340 |
-
# ],
|
341 |
-
# }
|
342 |
-
# ],
|
343 |
[
|
344 |
{
|
345 |
"files": [
|
346 |
f"{cur_dir}/examples/shub.jpg",
|
347 |
-
f"{cur_dir}/examples/shuc.jpg",
|
348 |
-
f"{cur_dir}/examples/shud.jpg",
|
349 |
-
],
|
350 |
-
"text": "what is fun about the images?",
|
351 |
-
}
|
352 |
-
],
|
353 |
-
[
|
354 |
-
{
|
355 |
-
"files": [
|
356 |
-
f"{cur_dir}/examples/iphone-15-price-1024x576.jpg",
|
357 |
-
f"{cur_dir}/examples/dynamic-island-1024x576.jpg",
|
358 |
-
f"{cur_dir}/examples/iphone-15-colors-1024x576.jpg",
|
359 |
-
f"{cur_dir}/examples/Iphone-15-Usb-c-charger-1024x576.jpg",
|
360 |
-
f"{cur_dir}/examples/A-17-processors-1024x576.jpg",
|
361 |
-
],
|
362 |
-
"text": "The images are the PPT of iPhone 15 review. can you summarize the main information?",
|
363 |
-
}
|
364 |
-
],
|
365 |
-
[
|
366 |
-
{
|
367 |
-
"files": [
|
368 |
-
f"{cur_dir}/examples/fangao3.jpeg",
|
369 |
-
f"{cur_dir}/examples/fangao2.jpeg",
|
370 |
-
f"{cur_dir}/examples/fangao1.jpeg",
|
371 |
],
|
372 |
-
"text": "
|
373 |
}
|
374 |
],
|
375 |
-
[
|
376 |
-
{
|
377 |
-
"files": [
|
378 |
-
f"{cur_dir}/examples/oprah-winfrey-resume.png",
|
379 |
-
f"{cur_dir}/examples/steve-jobs-resume.jpg",
|
380 |
-
],
|
381 |
-
"text": "Hi, there are two candidates, can you provide a brief description for each of them for me?",
|
382 |
-
}
|
383 |
-
],
|
384 |
-
[
|
385 |
-
{
|
386 |
-
"files": [
|
387 |
-
f"{cur_dir}/examples/original_bench.jpeg",
|
388 |
-
f"{cur_dir}/examples/changed_bench.jpeg",
|
389 |
-
],
|
390 |
-
"text": "How to edit image1 to make it look like image2?",
|
391 |
-
}
|
392 |
-
],
|
393 |
-
[
|
394 |
-
{
|
395 |
-
"files": [
|
396 |
-
f"{cur_dir}/examples/twitter2.jpeg",
|
397 |
-
f"{cur_dir}/examples/twitter3.jpeg",
|
398 |
-
f"{cur_dir}/examples/twitter4.jpeg",
|
399 |
-
],
|
400 |
-
"text": "Please write a twitter blog post with the images.",
|
401 |
-
}
|
402 |
-
]
|
403 |
|
404 |
],
|
405 |
inputs=[chat_input],
|
@@ -427,7 +370,7 @@ if __name__ == "__main__":
|
|
427 |
argparser.add_argument("--server_name", default="0.0.0.0", type=str)
|
428 |
argparser.add_argument("--port", default="6123", type=str)
|
429 |
argparser.add_argument(
|
430 |
-
"--model_path", default="
|
431 |
)
|
432 |
# argparser.add_argument("--model-path", type=str, default="facebook/opt-350m")
|
433 |
argparser.add_argument("--model-base", type=str, default=None)
|
@@ -448,4 +391,4 @@ if __name__ == "__main__":
|
|
448 |
tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit)
|
449 |
model=model.to(torch.device('cuda'))
|
450 |
our_chatbot = None
|
451 |
-
demo.launch()
|
|
|
31 |
KeywordsStoppingCriteria,
|
32 |
)
|
33 |
|
34 |
+
from serve_constants_mm_llm import html_header
|
35 |
+
|
36 |
from PIL import Image
|
37 |
|
38 |
import requests
|
|
|
84 |
conv_mode = "mpt"
|
85 |
elif "qwen" in model_name.lower():
|
86 |
conv_mode = "qwen_1_5"
|
87 |
+
elif "pangea" in model_name.lower():
|
88 |
+
conv_mode = "qwen_1_5"
|
89 |
else:
|
90 |
conv_mode = "llava_v0"
|
91 |
|
|
|
293 |
[[Blog]](https://llava-vl.github.io/blog/2024-06-16-llava-next-interleave/) [[Code]](https://github.com/LLaVA-VL/LLaVA-NeXT) [[Model]](https://huggingface.co/lmms-lab/llava-next-interleave-7b)
|
294 |
Note: The internleave checkpoint is updated (Date: Jul. 24, 2024), the wrong checkpiont is used before.
|
295 |
"""
|
296 |
+
|
297 |
+
|
298 |
tos_markdown = """
|
299 |
### TODO!. Terms of use
|
300 |
By using this service, users are required to agree to the following terms:
|
|
|
310 |
"LLaVA-Interleave-7B",
|
311 |
]
|
312 |
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
313 |
+
# gr.Markdown(title_markdown)
|
314 |
+
gr.HTML(html_header)
|
315 |
+
|
316 |
with gr.Column():
|
317 |
with gr.Row():
|
318 |
chatbot = gr.Chatbot([], elem_id="chatbot", bubble_full_width=False)
|
|
|
335 |
print(cur_dir)
|
336 |
gr.Examples(
|
337 |
examples=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
[
|
339 |
{
|
340 |
"files": [
|
341 |
f"{cur_dir}/examples/shub.jpg",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
],
|
343 |
+
"text": "what is fun about the image?",
|
344 |
}
|
345 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
|
347 |
],
|
348 |
inputs=[chat_input],
|
|
|
370 |
argparser.add_argument("--server_name", default="0.0.0.0", type=str)
|
371 |
argparser.add_argument("--port", default="6123", type=str)
|
372 |
argparser.add_argument(
|
373 |
+
"--model_path", default="neulab/Pangea-7B", type=str
|
374 |
)
|
375 |
# argparser.add_argument("--model-path", type=str, default="facebook/opt-350m")
|
376 |
argparser.add_argument("--model-base", type=str, default=None)
|
|
|
391 |
tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit)
|
392 |
model=model.to(torch.device('cuda'))
|
393 |
our_chatbot = None
|
394 |
+
demo.launch()
|