File size: 4,143 Bytes
6ce9885
3e2d679
6ce9885
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e2d679
6ce9885
3e2d679
6ce9885
 
 
d971da0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ce9885
d971da0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ce9885
 
d971da0
6ce9885
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
The error you're encountering stems from two separate issues:

1. **`trust_remote_code` warning:**
   This warning is triggered because `trust_remote_code` is used in the wrong context. It only affects Auto classes (like `AutoModel` or `AutoProcessor`) but has no effect when loading the model directly using `Qwen2VLForConditionalGeneration`. You can safely remove it when loading the model. Here's the corrected model loading line:

   ```python
   model = Qwen2VLForConditionalGeneration.from_pretrained(
       "Qwen/Qwen2-VL-2B-Instruct",
       torch_dtype=torch.float32,
       device_map="cpu"
   ).eval()
   ```

2. **`enable_queue` argument in `launch`:**
   The argument `enable_queue` has been replaced by `queue` in recent Gradio versions. Instead of using `enable_queue=False`, you should use `queue=False`. Here’s how to fix the `demo.launch()` call:

   ```python
   demo.launch(inline=False, server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)), debug=True, queue=False)
   ```

This should resolve the issues you're encountering. Here's the corrected code:

### Final Code Fix:

```python
import gradio as gr
import torch
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info
from PIL import Image
from datetime import datetime
import numpy as np
import os

# Function to save image array as a file and return the path
def array_to_image_path(image_array):
    img = Image.fromarray(np.uint8(image_array))
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"image_{timestamp}.png"
    img.save(filename)
    return os.path.abspath(filename)

# Load model and processor
model = Qwen2VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2-VL-2B-Instruct", 
    torch_dtype=torch.float32,
    device_map="cpu"
).eval()

processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")

DESCRIPTION = "[Qwen2-VL-2B Demo (CPU Version)](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)"

def run_example(image, text_input):
    image_path = array_to_image_path(image)
    
    image = Image.fromarray(image).convert("RGB")
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "image": image_path,
                },
                {"type": "text", "text": text_input},
            ],
        }
    ]
    
    # Preparation for inference
    text = processor.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    image_inputs, video_inputs = process_vision_info(messages)
    inputs = processor(
        text=[text],
        images=image_inputs,
        videos=video_inputs,
        padding=True,
        return_tensors="pt",
    )
    
    # Inference: Generation of the output
    with torch.no_grad():
        generated_ids = model.generate(**inputs, max_new_tokens=128)
    generated_ids_trimmed = [
        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
    ]
    output_text = processor.batch_decode(
        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
    )
    
    return output_text[0]

css = """
  #output {
    height: 500px; 
    overflow: auto; 
    border: 1px solid #ccc; 
  }
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown(DESCRIPTION)
    with gr.Tab(label="Qwen2-VL-2B Input (CPU)"):
        with gr.Row():
            with gr.Column():
                input_img = gr.Image(label="Input Picture")
                text_input = gr.Textbox(label="Question")
                submit_btn = gr.Button(value="Submit")
            with gr.Column():
                output_text = gr.Textbox(label="Output Text")

        submit_btn.click(run_example, [input_img, text_input], [output_text])

commandline_args = os.getenv("COMMANDLINE_ARGS", "")

demo.queue(api_open=False)
demo.launch(inline=False, server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)), debug=True, queue=("--no-gradio-queue" not in commandline_args))
```

This code should now work without the previous errors.