Phi-3-vision-128k

Running on Zero

App Files Files Community

MaziyarPanahi commited on Apr 28

Commit

d02b0d1

•

1 Parent(s): 340a6dd

Update app.py (#1)

Browse files

Files changed (1) hide show

app.py +18 -7

app.py CHANGED Viewed

@@ -1,5 +1,8 @@
 import gradio as gr
-from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration, TextIteratorStreamer
 from threading import Thread
 import re
 import time
@@ -7,9 +10,17 @@ from PIL import Image
 import torch
 import spaces
-processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
-model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
 model.to("cuda:0")
 @spaces.GPU
@@ -26,7 +37,7 @@ def bot_streaming(message, history):
   if image is None:
       gr.Error("You need to upload an image for LLaVA to work.")
-  prompt=f"[INST] <image>\n{message['text']} [/INST]"
   image = Image.open(image).convert("RGB")
   inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
@@ -37,7 +48,7 @@ def bot_streaming(message, history):
   thread = Thread(target=model.generate, kwargs=generation_kwargs)
   thread.start()
-  text_prompt =f"[INST]  \n{message['text']} [/INST]"
   buffer = ""
@@ -50,8 +61,8 @@ def bot_streaming(message, history):
     yield generated_text_without_prompt
-demo = gr.ChatInterface(fn=bot_streaming, title="LLaVA NeXT", examples=[{"text": "What is on the flower?", "files":["./bee.jpg"]},
                                                                       {"text": "How to make this pastry?", "files":["./baklava.png"]}],
-                        description="Try [LLaVA NeXT](https://huggingface.co/docs/transformers/main/en/model_doc/llava_next) in this demo (more specifically, the [Mistral-7B variant](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf)). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
                         stop_btn="Stop Generation", multimodal=True)
 demo.launch(debug=True)

 import gradio as gr
+from transformers import AutoProcessor, LlavaForConditionalGeneration
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 from threading import Thread
 import re
 import time
 import torch
 import spaces
+model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
+processor = AutoProcessor.from_pretrained(model_id)
+model = LlavaForConditionalGeneration.from_pretrained(
+    model_id,
+    torch_dtype=torch.float16,
+    low_cpu_mem_usage=True,
+)
 model.to("cuda:0")
 @spaces.GPU
   if image is None:
       gr.Error("You need to upload an image for LLaVA to work.")
+  prompt=f"<|start_header_id|>user<|end_header_id|>\n\n<image>\n{message['text']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
   image = Image.open(image).convert("RGB")
   inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
   thread = Thread(target=model.generate, kwargs=generation_kwargs)
   thread.start()
+  text_prompt =f"<|start_header_id|>user<|end_header_id|>\n\n{message['text']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
   buffer = ""
     yield generated_text_without_prompt
+demo = gr.ChatInterface(fn=bot_streaming, title="LLaVA Llama-3-8B", examples=[{"text": "What is on the flower?", "files":["./bee.jpg"]},
                                                                       {"text": "How to make this pastry?", "files":["./baklava.png"]}],
+                        description="Try [LLaVA Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
                         stop_btn="Stop Generation", multimodal=True)
 demo.launch(debug=True)