|
|
|
|
|
import gradio as gr |
|
import os |
|
import json |
|
import requests |
|
import time |
|
from concurrent.futures import ThreadPoolExecutor |
|
from utils import is_chinese, process_image_without_resize, parse_response, templates_agent_cogagent, template_grounding_cogvlm, postprocess_text |
|
|
|
DESCRIPTION = '''<h2 style='text-align: center'> <a href="https://github.com/THUDM/CogVLM"> CogVLM & CogAgent Chat Demo</a> </h2>''' |
|
|
|
NOTES = 'This app is adapted from <a href="https://github.com/THUDM/CogVLM">https://github.com/THUDM/CogVLM</a>. It would be recommended to check out the repo if you want to see the detail of our model.\n\n该demo仅作为测试使用,不支持批量请求。如有大批量需求,欢迎联系[智谱AI](mailto:[email protected])。\n\n请注意该Demo目前仅支持英文,<a href="http://36.103.203.44:7861/">备用网页</a>支持中文。' |
|
|
|
MAINTENANCE_NOTICE1 = 'Hint 1: If the app report "Something went wrong, connection error out", please turn off your proxy and retry.<br>Hint 2: If you upload a large size of image like 10MB, it may take some time to upload and process. Please be patient and wait.' |
|
|
|
GROUNDING_NOTICE = 'Hint: When you check "Grounding", please use the <a href="https://github.com/THUDM/CogVLM/blob/main/utils/utils/template.py#L344">corresponding prompt</a> or the examples below.' |
|
|
|
AGENT_NOTICE = 'Hint: When you check "CogAgent", please use the <a href="https://github.com/THUDM/CogVLM/blob/main/utils/utils/template.py#L761C1-L761C17">corresponding prompt</a> or the examples below.' |
|
|
|
|
|
default_chatbox = [("", "Hi, What do you want to know about this image?")] |
|
|
|
URL = os.environ.get("URL") |
|
|
|
|
|
def make_request(URL, headers, data): |
|
response = requests.request("POST", URL, headers=headers, data=data, timeout=(60, 100)) |
|
return response.json() |
|
|
|
def post( |
|
input_text, |
|
temperature, |
|
top_p, |
|
top_k, |
|
image_prompt, |
|
result_previous, |
|
hidden_image, |
|
grounding, |
|
cogagent, |
|
grounding_template, |
|
agent_template |
|
): |
|
result_text = [(ele[0], ele[1]) for ele in result_previous] |
|
for i in range(len(result_text)-1, -1, -1): |
|
if result_text[i][0] == "" or result_text[i][0] == None: |
|
del result_text[i] |
|
print(f"history {result_text}") |
|
|
|
is_zh = is_chinese(input_text) |
|
|
|
if image_prompt is None: |
|
print("Image empty") |
|
if is_zh: |
|
result_text.append((input_text, '图片为空!请上传图片并重试。')) |
|
else: |
|
result_text.append((input_text, 'Image empty! Please upload a image and retry.')) |
|
return input_text, result_text, hidden_image |
|
elif input_text == "": |
|
print("Text empty") |
|
result_text.append((input_text, 'Text empty! Please enter text and retry.')) |
|
return "", result_text, hidden_image |
|
|
|
headers = { |
|
"Content-Type": "application/json; charset=UTF-8", |
|
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36", |
|
} |
|
if image_prompt: |
|
pil_img, encoded_img, image_hash, image_path_grounding = process_image_without_resize(image_prompt) |
|
print(f"image_hash:{image_hash}, hidden_image_hash:{hidden_image}") |
|
|
|
if hidden_image is not None and image_hash != hidden_image: |
|
print("image has been update") |
|
result_text = [] |
|
hidden_image = image_hash |
|
else: |
|
encoded_img = None |
|
|
|
model_use = "vlm_chat" |
|
if not cogagent and grounding: |
|
model_use = "vlm_grounding" |
|
if grounding_template: |
|
input_text = postprocess_text(grounding_template, input_text) |
|
elif cogagent: |
|
model_use = "agent_chat" |
|
if agent_template is not None and agent_template != "do not use template": |
|
input_text = postprocess_text(agent_template, input_text) |
|
|
|
prompt = input_text |
|
|
|
if grounding: |
|
prompt += "(with grounding)" |
|
|
|
print(f'request {model_use} model... with prompt {prompt}, grounding_template {grounding_template}, agent_template {agent_template}') |
|
data = json.dumps({ |
|
'model_use': model_use, |
|
'is_grounding': grounding, |
|
'text': prompt, |
|
'history': result_text, |
|
'image': encoded_img, |
|
'temperature': temperature, |
|
'top_p': top_p, |
|
'top_k': top_k, |
|
'do_sample': True, |
|
'max_new_tokens': 2048 |
|
}) |
|
try: |
|
with ThreadPoolExecutor(max_workers=1) as executor: |
|
future = executor.submit(make_request, URL, headers, data) |
|
|
|
response = future.result() |
|
|
|
except Exception as e: |
|
print("error message", e) |
|
if is_zh: |
|
result_text.append((input_text, '超时!请稍等几分钟再重试。')) |
|
else: |
|
result_text.append((input_text, 'Timeout! Please wait a few minutes and retry.')) |
|
return "", result_text, hidden_image |
|
print('request done...') |
|
|
|
|
|
answer = str(response['result']) |
|
if grounding: |
|
parse_response(pil_img, answer, image_path_grounding) |
|
new_answer = answer.replace(input_text, "") |
|
result_text.append((input_text, new_answer)) |
|
result_text.append((None, (image_path_grounding,))) |
|
else: |
|
result_text.append((input_text, answer)) |
|
print(result_text) |
|
print('finished') |
|
return "", result_text, hidden_image |
|
|
|
|
|
def clear_fn(value): |
|
return "", default_chatbox, None |
|
|
|
def clear_fn2(value): |
|
return default_chatbox |
|
|
|
|
|
def main(): |
|
gr.close_all() |
|
examples = [] |
|
with open("./examples/example_inputs.jsonl") as f: |
|
for line in f: |
|
data = json.loads(line) |
|
examples.append(data) |
|
|
|
|
|
with gr.Blocks(css='style.css') as demo: |
|
|
|
gr.Markdown(DESCRIPTION) |
|
gr.Markdown(NOTES) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=4.5): |
|
with gr.Group(): |
|
input_text = gr.Textbox(label='Input Text', placeholder='Please enter text prompt below and press ENTER.') |
|
with gr.Row(): |
|
run_button = gr.Button('Generate') |
|
clear_button = gr.Button('Clear') |
|
|
|
image_prompt = gr.Image(type="filepath", label="Image Prompt", value=None) |
|
with gr.Row(): |
|
grounding = gr.Checkbox(label="Grounding") |
|
cogagent = gr.Checkbox(label="CogAgent") |
|
with gr.Row(): |
|
|
|
grounding_template = gr.Dropdown(choices=template_grounding_cogvlm, label="Grounding Template", value=template_grounding_cogvlm[0]) |
|
|
|
agent_template = gr.Dropdown(choices=templates_agent_cogagent, label="Agent Template", value=templates_agent_cogagent[0]) |
|
|
|
with gr.Row(): |
|
temperature = gr.Slider(maximum=1, value=0.9, minimum=0, label='Temperature') |
|
top_p = gr.Slider(maximum=1, value=0.8, minimum=0, label='Top P') |
|
top_k = gr.Slider(maximum=50, value=5, minimum=1, step=1, label='Top K') |
|
|
|
with gr.Column(scale=5.5): |
|
result_text = gr.components.Chatbot(label='Multi-round conversation History', value=[("", "Hi, What do you want to know about this image?")], height=550) |
|
hidden_image_hash = gr.Textbox(visible=False) |
|
|
|
gr_examples = gr.Examples(examples=[[example["text"], example["image"], example["grounding"], example["cogagent"]] for example in examples], |
|
inputs=[input_text, image_prompt, grounding, cogagent], |
|
label="Example Inputs (Click to insert an examplet into the input box)", |
|
examples_per_page=6) |
|
|
|
gr.Markdown(MAINTENANCE_NOTICE1) |
|
|
|
print(gr.__version__) |
|
run_button.click(fn=post,inputs=[input_text, temperature, top_p, top_k, image_prompt, result_text, hidden_image_hash, grounding, cogagent, grounding_template, agent_template], |
|
outputs=[input_text, result_text, hidden_image_hash]) |
|
input_text.submit(fn=post,inputs=[input_text, temperature, top_p, top_k, image_prompt, result_text, hidden_image_hash, grounding, cogagent, grounding_template, agent_template], |
|
outputs=[input_text, result_text, hidden_image_hash]) |
|
clear_button.click(fn=clear_fn, inputs=clear_button, outputs=[input_text, result_text, image_prompt]) |
|
image_prompt.upload(fn=clear_fn2, inputs=clear_button, outputs=[result_text]) |
|
image_prompt.clear(fn=clear_fn2, inputs=clear_button, outputs=[result_text]) |
|
|
|
print(gr.__version__) |
|
|
|
demo.queue(concurrency_count=10) |
|
demo.launch() |
|
|
|
if __name__ == '__main__': |
|
main() |