Spaces:

Nishgop
/

cogVLM

Runtime error

App Files Files Community

Nishgop commited on Mar 2

Commit

c1e62b8

•

1 Parent(s): 76d42da

Update app.py

Browse files

Files changed (1) hide show

app.py +204 -73

app.py CHANGED Viewed

@@ -1,75 +1,206 @@
 import gradio as gr
-import torch
 import json
-from io import BytesIO
-from PIL import Image, ImageOps
-from IPython.display import display, Markdown
-from transformers import AutoModelForCausalLM, LlamaTokenizer
-from accelerate import init_empty_weights, infer_auto_device_map, load_checkpoint_and_dispatch
-# Initialize tokenizer and model
-tokenizer = LlamaTokenizer.from_pretrained('lmsys/vicuna-7b-v1.5')
-# tokenizer = LlamaTokenizer.from_pretrained('vicuna-7b-v1.5')
-model = AutoModelForCausalLM.from_pretrained(
-        'THUDM/cogvlm-chat-hf',
-        load_in_4bit=True,
-        trust_remote_code=True,
-        device_map="auto"
-    ).eval()
-def generate_description(image, query, top_p, top_k, output_length, temperature):
-    # Use the uploaded image (PIL format)
-    display_size = (224, 224)
-    image = image.resize(display_size, Image.LANCZOS)
-    # Build the conversation input
-    inputs = model.build_conversation_input_ids(tokenizer, query=query, history=[], images=[image])
-    # Prepare the inputs dictionary for model.generate()
-    inputs = {
-        'input_ids': inputs['input_ids'].unsqueeze(0).to('cuda'),
-        'token_type_ids': inputs['token_type_ids'].unsqueeze(0).to('cuda'),
-        'attention_mask': inputs['attention_mask'].unsqueeze(0).to('cuda'),
-        'images': [[inputs['images'][0].to('cuda').to(torch.float16)]],
-    }
-    # Set the generation kwargs with user-defined values
-    gen_kwargs = {
-        "max_length": output_length,
-        "do_sample": True,  # Enable sampling to use top_p, top_k, and temperature
-        "top_p": top_p,
-        "top_k": top_k,
-        "temperature": temperature
-    }
-    # Generate the description
-    with torch.no_grad():
-        outputs = model.generate(**inputs, **gen_kwargs)
-        description = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return description
-with gr.Blocks() as app:
-    gr.Markdown("# Visual Product DNA - Image to Attribute Extractor")
-    with gr.Row():
-        with gr.Column():
-            image_input = gr.Image(label="Upload Image", type="pil", height=500)
-            gr.skip
-            query_input = gr.Textbox(label="Enter your prompt", value="Capture all attributes as JSON", lines=4)
-        with gr.Column():
-            top_p_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.1, label="Creativity (top_p)")
-            top_k_slider = gr.Slider(minimum=0, maximum=100, step=1, value=100, label="Coherence (top_k)")
-            output_length_slider = gr.Slider(minimum=1, maximum=4096, step=1, value=2048, label="Output Length")
-            temperature_slider = gr.Slider(minimum=0.1, maximum=2.0, step=0.01, value=0.1, label="Temperature")
-            submit_button = gr.Button("Extract Attributes")
-            description_output = gr.Textbox(label="Generated JSON", lines=12)
-    submit_button.click(
-        fn=generate_description,
-        inputs=[image_input, query_input, top_p_slider, top_k_slider, output_length_slider, temperature_slider],
-        outputs=description_output
-    )
-app.launch(share=True, input = False)

+#!/usr/bin/env python
 import gradio as gr
+import os
 import json
+import requests
+import time
+from concurrent.futures import ThreadPoolExecutor
+from utils import is_chinese, process_image_without_resize, parse_response, templates_agent_cogagent, template_grounding_cogvlm, postprocess_text
+DESCRIPTION = '''<h2 style='text-align: center'> <a href="https://github.com/THUDM/CogVLM"> CogVLM & CogAgent Chat Demo</a> </h2>'''
+NOTES = 'This app is adapted from <a href="https://github.com/THUDM/CogVLM">https://github.com/THUDM/CogVLM</a>. It would be recommended to check out the repo if you want to see the detail of our model.\n\n该demo仅作为测试使用，不支持批量请求。如有大批量需求，欢迎联系[智谱AI](mailto:[email protected])。\n\n请注意该Demo目前仅支持英文，<a href="http://36.103.203.44:7861/">备用网页</a>支持中文。'
+MAINTENANCE_NOTICE1 = 'Hint 1: If the app report "Something went wrong, connection error out", please turn off your proxy and retry.<br>Hint 2: If you upload a large size of image like 10MB, it may take some time to upload and process. Please be patient and wait.'
+GROUNDING_NOTICE = 'Hint: When you check "Grounding", please use the <a href="https://github.com/THUDM/CogVLM/blob/main/utils/utils/template.py#L344">corresponding prompt</a> or the examples below.'
+AGENT_NOTICE = 'Hint: When you check "CogAgent", please use the <a href="https://github.com/THUDM/CogVLM/blob/main/utils/utils/template.py#L761C1-L761C17">corresponding prompt</a> or the examples below.'
+default_chatbox = [("", "Hi, What do you want to know about this image?")]
+URL = os.environ.get("URL")
+def make_request(URL, headers, data):
+    response = requests.request("POST", URL, headers=headers, data=data, timeout=(60, 100))
+    return response.json()
+def post(
+        input_text,
+        temperature,
+        top_p,
+        top_k,
+        image_prompt,
+        result_previous,
+        hidden_image,
+        grounding,
+        cogagent,
+        grounding_template,
+        agent_template
+        ):
+    result_text = [(ele[0], ele[1]) for ele in result_previous]
+    for i in range(len(result_text)-1, -1, -1):
+        if result_text[i][0] == "" or result_text[i][0] == None:
+            del result_text[i]
+    print(f"history {result_text}")
+    is_zh = is_chinese(input_text)
+    if image_prompt is None:
+        print("Image empty")
+        if is_zh:
+            result_text.append((input_text, '图片为空！请上传图片并重试。'))
+        else:
+            result_text.append((input_text, 'Image empty! Please upload a image and retry.'))
+        return input_text, result_text, hidden_image
+    elif input_text == "":
+        print("Text empty")
+        result_text.append((input_text, 'Text empty! Please enter text and retry.'))
+        return "", result_text, hidden_image
+    headers = {
+            "Content-Type": "application/json; charset=UTF-8",
+            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36",
+        }
+    if image_prompt:
+        pil_img, encoded_img, image_hash, image_path_grounding = process_image_without_resize(image_prompt)
+        print(f"image_hash:{image_hash}, hidden_image_hash:{hidden_image}")
+        if hidden_image is not None and image_hash != hidden_image:
+            print("image has been update")
+            result_text = []
+        hidden_image = image_hash
+    else:
+        encoded_img = None
+    model_use = "vlm_chat"
+    if not cogagent and grounding:
+        model_use = "vlm_grounding"
+        if grounding_template:
+            input_text = postprocess_text(grounding_template, input_text)
+    elif cogagent:
+        model_use = "agent_chat"
+        if agent_template is not None and agent_template != "do not use template":
+            input_text = postprocess_text(agent_template, input_text)
+    prompt = input_text
+    if grounding:
+        prompt += "(with grounding)"
+    print(f'request {model_use} model... with prompt {prompt}, grounding_template {grounding_template}, agent_template {agent_template}')
+    data = json.dumps({
+        'model_use': model_use,
+        'is_grounding': grounding,
+        'text': prompt,
+        'history': result_text,
+        'image': encoded_img,
+        'temperature': temperature,
+        'top_p': top_p,
+        'top_k': top_k,
+        'do_sample': True,
+        'max_new_tokens': 2048
+    })
+    try:
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            future = executor.submit(make_request, URL, headers, data)
+            # time.sleep(15)
+            response = future.result()  # Blocks until the request is complete
+        # response = requests.request("POST", URL, headers=headers, data=data, timeout=(60, 100)).json()
+    except Exception as e:
+        print("error message", e)
+        if is_zh:
+            result_text.append((input_text, '超时！请稍等几分钟再重试。'))
+        else:
+            result_text.append((input_text, 'Timeout! Please wait a few minutes and retry.'))
+        return "", result_text, hidden_image
+    print('request done...')
+    # response = {'result':input_text}
+    answer = str(response['result'])
+    if grounding:
+        parse_response(pil_img, answer, image_path_grounding)
+        new_answer = answer.replace(input_text, "")
+        result_text.append((input_text, new_answer))
+        result_text.append((None, (image_path_grounding,)))
+    else:
+        result_text.append((input_text, answer))
+    print(result_text)
+    print('finished')
+    return "", result_text, hidden_image
+def clear_fn(value):
+    return "", default_chatbox, None
+def clear_fn2(value):
+    return default_chatbox
+def main():
+    gr.close_all()
+    examples = []
+    with open("./examples/example_inputs.jsonl") as f:
+        for line in f:
+            data = json.loads(line)
+            examples.append(data)
+    with gr.Blocks(css='style.css') as demo:
+        gr.Markdown(DESCRIPTION)
+        gr.Markdown(NOTES)
+        with gr.Row():
+            with gr.Column(scale=4.5):
+                with gr.Group():
+                    input_text = gr.Textbox(label='Input Text', placeholder='Please enter text prompt below and press ENTER.')
+                    with gr.Row():
+                        run_button = gr.Button('Generate')
+                        clear_button = gr.Button('Clear')
+                    image_prompt = gr.Image(type="filepath", label="Image Prompt", value=None)
+                with gr.Row():
+                    grounding = gr.Checkbox(label="Grounding")
+                    cogagent = gr.Checkbox(label="CogAgent")
+                with gr.Row():
+                    # grounding_notice = gr.Markdown(GROUNDING_NOTICE)
+                    grounding_template = gr.Dropdown(choices=template_grounding_cogvlm, label="Grounding Template", value=template_grounding_cogvlm[0])
+                    # agent_notice = gr.Markdown(AGENT_NOTICE)
+                    agent_template = gr.Dropdown(choices=templates_agent_cogagent, label="Agent Template", value=templates_agent_cogagent[0])
+                with gr.Row():
+                    temperature = gr.Slider(maximum=1, value=0.9, minimum=0, label='Temperature')
+                    top_p = gr.Slider(maximum=1, value=0.8, minimum=0, label='Top P')
+                    top_k = gr.Slider(maximum=50, value=5, minimum=1, step=1, label='Top K')
+            with gr.Column(scale=5.5):
+                result_text = gr.components.Chatbot(label='Multi-round conversation History', value=[("", "Hi, What do you want to know about this image?")], height=550)
+                hidden_image_hash = gr.Textbox(visible=False)
+        gr_examples = gr.Examples(examples=[[example["text"], example["image"], example["grounding"], example["cogagent"]] for example in examples],
+                                  inputs=[input_text, image_prompt, grounding, cogagent],
+                                  label="Example Inputs (Click to insert an examplet into the input box)",
+                                  examples_per_page=6)
+        gr.Markdown(MAINTENANCE_NOTICE1)
+        print(gr.__version__)
+        run_button.click(fn=post,inputs=[input_text, temperature, top_p, top_k, image_prompt, result_text, hidden_image_hash, grounding, cogagent, grounding_template, agent_template],
+                         outputs=[input_text, result_text, hidden_image_hash])
+        input_text.submit(fn=post,inputs=[input_text, temperature, top_p, top_k, image_prompt, result_text, hidden_image_hash, grounding, cogagent, grounding_template, agent_template],
+                         outputs=[input_text, result_text, hidden_image_hash])
+        clear_button.click(fn=clear_fn, inputs=clear_button, outputs=[input_text, result_text, image_prompt])
+        image_prompt.upload(fn=clear_fn2, inputs=clear_button, outputs=[result_text])
+        image_prompt.clear(fn=clear_fn2, inputs=clear_button, outputs=[result_text])
+        print(gr.__version__)
+    demo.queue(concurrency_count=10)
+    demo.launch()
+if __name__ == '__main__':
+    main()