Spaces:
Runtime error
Runtime error
update demo
Browse files- app.py +7 -6
- minigemini/model/builder.py +2 -2
- minigemini/serve/gradio_web_server.py +6 -1
app.py
CHANGED
@@ -20,7 +20,7 @@ from diffusers import StableDiffusionXLPipeline
|
|
20 |
from minigemini.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
|
21 |
from minigemini.mm_utils import process_images, load_image_from_base64, tokenizer_image_token
|
22 |
from minigemini.conversation import default_conversation, conv_templates, SeparatorStyle, Conversation
|
23 |
-
from minigemini.serve.gradio_web_server import function_markdown, tos_markdown, learn_more_markdown, title_markdown, block_css
|
24 |
from minigemini.model.builder import load_pretrained_model
|
25 |
|
26 |
# os.system('python -m pip install paddlepaddle-gpu==2.4.2.post117 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html')
|
@@ -35,7 +35,7 @@ def download_model(repo_id):
|
|
35 |
|
36 |
if not os.path.exists('./checkpoints/'):
|
37 |
os.makedirs('./checkpoints/')
|
38 |
-
download_model('YanweiLi/
|
39 |
download_model('laion/CLIP-convnext_large_d_320.laion2B-s29B-b131K-ft-soup')
|
40 |
|
41 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -43,8 +43,8 @@ load_8bit = False
|
|
43 |
load_4bit = False
|
44 |
dtype = torch.float16
|
45 |
conv_mode = "vicuna_v1"
|
46 |
-
model_path = './checkpoints/
|
47 |
-
model_name = '
|
48 |
model_base = None
|
49 |
|
50 |
tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name,
|
@@ -242,7 +242,7 @@ def delete_text(state, image_process_mode):
|
|
242 |
|
243 |
|
244 |
textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
|
245 |
-
with gr.Blocks(title='
|
246 |
gr.Markdown(title_markdown)
|
247 |
state = gr.State()
|
248 |
|
@@ -272,7 +272,7 @@ with gr.Blocks(title='Mini-Gemini') as demo:
|
|
272 |
with gr.Column(scale=7):
|
273 |
chatbot = gr.Chatbot(
|
274 |
elem_id="chatbot",
|
275 |
-
label="
|
276 |
height=850,
|
277 |
layout="panel",
|
278 |
)
|
@@ -291,6 +291,7 @@ with gr.Blocks(title='Mini-Gemini') as demo:
|
|
291 |
gr.Markdown(function_markdown)
|
292 |
gr.Markdown(tos_markdown)
|
293 |
gr.Markdown(learn_more_markdown)
|
|
|
294 |
|
295 |
btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn]
|
296 |
upvote_btn.click(
|
|
|
20 |
from minigemini.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
|
21 |
from minigemini.mm_utils import process_images, load_image_from_base64, tokenizer_image_token
|
22 |
from minigemini.conversation import default_conversation, conv_templates, SeparatorStyle, Conversation
|
23 |
+
from minigemini.serve.gradio_web_server import function_markdown, tos_markdown, learn_more_markdown, title_markdown, ack_markdown, block_css
|
24 |
from minigemini.model.builder import load_pretrained_model
|
25 |
|
26 |
# os.system('python -m pip install paddlepaddle-gpu==2.4.2.post117 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html')
|
|
|
35 |
|
36 |
if not os.path.exists('./checkpoints/'):
|
37 |
os.makedirs('./checkpoints/')
|
38 |
+
download_model('YanweiLi/MGM-13B-HD')
|
39 |
download_model('laion/CLIP-convnext_large_d_320.laion2B-s29B-b131K-ft-soup')
|
40 |
|
41 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
43 |
load_4bit = False
|
44 |
dtype = torch.float16
|
45 |
conv_mode = "vicuna_v1"
|
46 |
+
model_path = './checkpoints/MGM-13B-HD'
|
47 |
+
model_name = 'MGM-13B-HD'
|
48 |
model_base = None
|
49 |
|
50 |
tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name,
|
|
|
242 |
|
243 |
|
244 |
textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
|
245 |
+
with gr.Blocks(title='MGM') as demo:
|
246 |
gr.Markdown(title_markdown)
|
247 |
state = gr.State()
|
248 |
|
|
|
272 |
with gr.Column(scale=7):
|
273 |
chatbot = gr.Chatbot(
|
274 |
elem_id="chatbot",
|
275 |
+
label="MGM Chatbot",
|
276 |
height=850,
|
277 |
layout="panel",
|
278 |
)
|
|
|
291 |
gr.Markdown(function_markdown)
|
292 |
gr.Markdown(tos_markdown)
|
293 |
gr.Markdown(learn_more_markdown)
|
294 |
+
gr.Markdown(ack_markdown)
|
295 |
|
296 |
btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn]
|
297 |
upvote_btn.click(
|
minigemini/model/builder.py
CHANGED
@@ -49,7 +49,7 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, l
|
|
49 |
|
50 |
logging.getLogger("transformers").setLevel(logging.ERROR)
|
51 |
|
52 |
-
if '
|
53 |
# Load MiniGemini model
|
54 |
if model_base is not None:
|
55 |
# this may be mm projector only
|
@@ -116,7 +116,7 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, l
|
|
116 |
vision_tower.to(device=device, dtype=torch.float16)
|
117 |
image_processor = vision_tower.image_processor
|
118 |
|
119 |
-
if '
|
120 |
vision_tower_aux = model.get_vision_tower_aux()
|
121 |
if not vision_tower_aux.is_loaded:
|
122 |
vision_tower_aux.load_model()
|
|
|
49 |
|
50 |
logging.getLogger("transformers").setLevel(logging.ERROR)
|
51 |
|
52 |
+
if 'mgm' in model_name.lower():
|
53 |
# Load MiniGemini model
|
54 |
if model_base is not None:
|
55 |
# this may be mm projector only
|
|
|
116 |
vision_tower.to(device=device, dtype=torch.float16)
|
117 |
image_processor = vision_tower.image_processor
|
118 |
|
119 |
+
if 'mgm' in model_name.lower():
|
120 |
vision_tower_aux = model.get_vision_tower_aux()
|
121 |
if not vision_tower_aux.is_loaded:
|
122 |
vision_tower_aux.load_model()
|
minigemini/serve/gradio_web_server.py
CHANGED
@@ -280,7 +280,7 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, gen_imag
|
|
280 |
fout.write(json.dumps(data) + "\n")
|
281 |
|
282 |
title_markdown = ("""
|
283 |
-
# Mini-Gemini: Mining the Potential of Multi-modality Vision Language Models
|
284 |
[[Project Page]](https://mini-gemini.github.io/) [[Paper]](https://arxiv.org/abs/2403.18814) [[Code]](https://github.com/dvlab-research/MiniGemini) [[Model]](https://huggingface.co/collections/YanweiLi/mini-gemini-6603c50b9b43d044171d0854) [[Data]](https://huggingface.co/collections/YanweiLi/mini-gemini-data-660463ea895a01d8f367624e) <br>
|
285 |
This is Mini-Gemini-13B-HD version. The Mini-Gemini-34B-HD is deployed on [[here]](http://10.81.134.110:7860/)
|
286 |
""")
|
@@ -304,6 +304,11 @@ learn_more_markdown = ("""
|
|
304 |
The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
|
305 |
""")
|
306 |
|
|
|
|
|
|
|
|
|
|
|
307 |
block_css = """
|
308 |
|
309 |
#buttons button {
|
|
|
280 |
fout.write(json.dumps(data) + "\n")
|
281 |
|
282 |
title_markdown = ("""
|
283 |
+
# Official demo for the paper "Mini-Gemini: Mining the Potential of Multi-modality Vision Language Models
|
284 |
[[Project Page]](https://mini-gemini.github.io/) [[Paper]](https://arxiv.org/abs/2403.18814) [[Code]](https://github.com/dvlab-research/MiniGemini) [[Model]](https://huggingface.co/collections/YanweiLi/mini-gemini-6603c50b9b43d044171d0854) [[Data]](https://huggingface.co/collections/YanweiLi/mini-gemini-data-660463ea895a01d8f367624e) <br>
|
285 |
This is Mini-Gemini-13B-HD version. The Mini-Gemini-34B-HD is deployed on [[here]](http://10.81.134.110:7860/)
|
286 |
""")
|
|
|
304 |
The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
|
305 |
""")
|
306 |
|
307 |
+
ack_markdown = ("""
|
308 |
+
### Acknowledgement
|
309 |
+
This project is not affiliated with Google LLC.
|
310 |
+
""")
|
311 |
+
|
312 |
block_css = """
|
313 |
|
314 |
#buttons button {
|