Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -6,29 +6,39 @@ import torch
|
|
6 |
from PIL import Image
|
7 |
from transformers import AutoProcessor, LlavaForConditionalGeneration
|
8 |
from transformers import TextIteratorStreamer
|
9 |
-
|
10 |
import spaces
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
|
13 |
PLACEHOLDER = """
|
14 |
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
|
15 |
-
<img src="https://
|
16 |
-
<h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">LLaVA-
|
17 |
-
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">
|
18 |
</div>
|
19 |
"""
|
20 |
|
21 |
|
22 |
model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
|
23 |
-
|
24 |
processor = AutoProcessor.from_pretrained(model_id)
|
25 |
-
|
26 |
model = LlavaForConditionalGeneration.from_pretrained(
|
27 |
model_id,
|
28 |
torch_dtype=torch.float16,
|
29 |
low_cpu_mem_usage=True,
|
30 |
)
|
31 |
-
|
32 |
model.to("cuda:0")
|
33 |
model.generation_config.eos_token_id = 128009
|
34 |
|
@@ -88,10 +98,10 @@ def bot_streaming(message, history):
|
|
88 |
|
89 |
chatbot=gr.Chatbot(placeholder=PLACEHOLDER,scale=1)
|
90 |
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
|
91 |
-
with gr.Blocks(fill_height=True, ) as demo:
|
92 |
gr.ChatInterface(
|
93 |
fn=bot_streaming,
|
94 |
-
title="LLaVA
|
95 |
examples=[{"text": "What is on the flower?", "files": ["./bee.jpg"]},
|
96 |
{"text": "How to make this pastry?", "files": ["./baklava.png"]}],
|
97 |
description="Try [LLaVA Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
|
@@ -99,7 +109,66 @@ with gr.Blocks(fill_height=True, ) as demo:
|
|
99 |
multimodal=True,
|
100 |
textbox=chat_input,
|
101 |
chatbot=chatbot,
|
102 |
-
)
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
from PIL import Image
|
7 |
from transformers import AutoProcessor, LlavaForConditionalGeneration
|
8 |
from transformers import TextIteratorStreamer
|
9 |
+
from datasets import load_dataset
|
10 |
import spaces
|
11 |
+
import pandas as pd
|
12 |
+
|
13 |
+
rekaeval = "RekaAI/VibeEval"
|
14 |
+
dataset = load_dataset(rekaeval, split="test")
|
15 |
+
df = pd.DataFrame(dataset)
|
16 |
+
df_markdown = df.copy()
|
17 |
+
|
18 |
+
# Function to convert URL to HTML img tag
|
19 |
+
def mediaurl_to_img_tag(url):
|
20 |
+
return f'<img src="{url}">'
|
21 |
+
|
22 |
+
# Apply the function to the DataFrame column
|
23 |
+
df_markdown['media_url'] = df_markdown['media_url'].apply(mediaurl_to_img_tag)
|
24 |
|
25 |
|
26 |
PLACEHOLDER = """
|
27 |
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
|
28 |
+
<img src="https://avatars.githubusercontent.com/u/51063788?s=400&u=479ecc9d93d8a373b5c2e69ebe846f394811e94a&v=4)" style="width:40%" opacity="0.45">
|
29 |
+
<h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">LLaVA-Llama3-8B With REKA Vibe-Eval</h1>
|
30 |
+
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Test your Vision LLMs with new Vibe-Evals from REKA</p>
|
31 |
</div>
|
32 |
"""
|
33 |
|
34 |
|
35 |
model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
|
|
|
36 |
processor = AutoProcessor.from_pretrained(model_id)
|
|
|
37 |
model = LlavaForConditionalGeneration.from_pretrained(
|
38 |
model_id,
|
39 |
torch_dtype=torch.float16,
|
40 |
low_cpu_mem_usage=True,
|
41 |
)
|
|
|
42 |
model.to("cuda:0")
|
43 |
model.generation_config.eos_token_id = 128009
|
44 |
|
|
|
98 |
|
99 |
chatbot=gr.Chatbot(placeholder=PLACEHOLDER,scale=1)
|
100 |
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
|
101 |
+
tmp = '''with gr.Blocks(fill_height=True, ) as demo:
|
102 |
gr.ChatInterface(
|
103 |
fn=bot_streaming,
|
104 |
+
title="Testing LLaVA-Llama3-8b with Reka's Vibe-Eval",
|
105 |
examples=[{"text": "What is on the flower?", "files": ["./bee.jpg"]},
|
106 |
{"text": "How to make this pastry?", "files": ["./baklava.png"]}],
|
107 |
description="Try [LLaVA Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
|
|
|
109 |
multimodal=True,
|
110 |
textbox=chat_input,
|
111 |
chatbot=chatbot,
|
112 |
+
)'''
|
113 |
+
|
114 |
+
with gr.Blocks() as demo:
|
115 |
+
with gr.Row():
|
116 |
+
with gr.Column():
|
117 |
+
gr.ChatInterface(
|
118 |
+
fn=bot_streaming,
|
119 |
+
title="Testing LLaVA-Llama3-8b with Reka's Vibe-Eval",
|
120 |
+
description="Try [LLaVA Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
|
121 |
+
stop_btn="Stop Generation",
|
122 |
+
multimodal=True,
|
123 |
+
textbox=chat_input,
|
124 |
+
chatbot=chatbot,
|
125 |
+
)
|
126 |
+
with gr.Column():
|
127 |
+
with gr.Row():
|
128 |
+
b1 = gr.Button("Previous")
|
129 |
+
b2 = gr.Button("Next")
|
130 |
+
reka = gr.Dataframe(value=df_markdown[0:5], datatype=['markdown', 'str'], wrap=False, interactive=False, height=500)
|
131 |
+
num_start = gr.Number(visible=False, value=0)
|
132 |
+
num_end = gr.Number(visible=False, value=4)
|
133 |
+
|
134 |
+
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
135 |
+
bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name="bot_response")
|
136 |
+
bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])
|
137 |
+
chatbot.like(print_like_dislike, None, None)
|
138 |
+
|
139 |
+
def get_example(reka, evt: gr.SelectData):
|
140 |
+
print(f'evt.value = {evt.value}')
|
141 |
+
print(f'evt.index = {evt.index}')
|
142 |
+
x = evt.index[0]
|
143 |
+
image = df.iloc[x, 0]
|
144 |
+
prompt = df.iloc[x, 1]
|
145 |
+
print(f'image = {image}')
|
146 |
+
print(f'prompt = {prompt}')
|
147 |
+
example = {"text": prompt, "files": [image]}
|
148 |
+
return example
|
149 |
+
|
150 |
+
def display_next(dataframe, end):
|
151 |
+
print(f'initial value of end = {end}')
|
152 |
+
start = (end or dataframe.index[-1]) + 1
|
153 |
+
end = start + 4
|
154 |
+
df_images = df_markdown.loc[start:end]
|
155 |
+
print(f'returned value of end = {end}')
|
156 |
+
print(f'returned value of start = {start}')
|
157 |
+
return df_images, end, start, gr.Button(interactive=True)
|
158 |
+
|
159 |
+
def display_previous(dataframe, start):
|
160 |
+
print(f'initial value of start = {start}')
|
161 |
+
end = (start or dataframe.index[-1])
|
162 |
+
start = end - 5
|
163 |
+
df_images = df_markdown.loc[start:end]
|
164 |
+
print(f'returned value of start = {start}')
|
165 |
+
print(f'returned value of end = {end}')
|
166 |
+
return df_images, end, start, gr.Button(interactive=False) if start==0 else gr.Button(interactive=True)
|
167 |
+
|
168 |
+
reka.select(get_example, reka, chat_input, show_progress="hidden")
|
169 |
+
b2.click(fn=display_next, inputs= [reka, num_end ], outputs=[reka, num_end, num_start, b1], api_name="next_rows", show_progress=False)
|
170 |
+
b1.click(fn=display_previous, inputs= [reka, num_start ], outputs=[reka, num_end, num_start, b1], api_name="previous_rows")
|
171 |
+
|
172 |
+
|
173 |
+
demo.queue()
|
174 |
+
demo.launch(debug=True)
|