The error you're encountering stems from two separate issues: 1. **`trust_remote_code` warning:** This warning is triggered because `trust_remote_code` is used in the wrong context. It only affects Auto classes (like `AutoModel` or `AutoProcessor`) but has no effect when loading the model directly using `Qwen2VLForConditionalGeneration`. You can safely remove it when loading the model. Here's the corrected model loading line: ```python model = Qwen2VLForConditionalGeneration.from_pretrained( "Qwen/Qwen2-VL-2B-Instruct", torch_dtype=torch.float32, device_map="cpu" ).eval() ``` 2. **`enable_queue` argument in `launch`:** The argument `enable_queue` has been replaced by `queue` in recent Gradio versions. Instead of using `enable_queue=False`, you should use `queue=False`. Here’s how to fix the `demo.launch()` call: ```python demo.launch(inline=False, server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)), debug=True, queue=False) ``` This should resolve the issues you're encountering. Here's the corrected code: ### Final Code Fix: ```python import gradio as gr import torch from transformers import Qwen2VLForConditionalGeneration, AutoProcessor from qwen_vl_utils import process_vision_info from PIL import Image from datetime import datetime import numpy as np import os # Function to save image array as a file and return the path def array_to_image_path(image_array): img = Image.fromarray(np.uint8(image_array)) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"image_{timestamp}.png" img.save(filename) return os.path.abspath(filename) # Load model and processor model = Qwen2VLForConditionalGeneration.from_pretrained( "Qwen/Qwen2-VL-2B-Instruct", torch_dtype=torch.float32, device_map="cpu" ).eval() processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct") DESCRIPTION = "[Qwen2-VL-2B Demo (CPU Version)](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)" def run_example(image, text_input): image_path = array_to_image_path(image) image = Image.fromarray(image).convert("RGB") messages = [ { "role": "user", "content": [ { "type": "image", "image": image_path, }, {"type": "text", "text": text_input}, ], } ] # Preparation for inference text = processor.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) image_inputs, video_inputs = process_vision_info(messages) inputs = processor( text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt", ) # Inference: Generation of the output with torch.no_grad(): generated_ids = model.generate(**inputs, max_new_tokens=128) generated_ids_trimmed = [ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) ] output_text = processor.batch_decode( generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False ) return output_text[0] css = """ #output { height: 500px; overflow: auto; border: 1px solid #ccc; } """ with gr.Blocks(css=css) as demo: gr.Markdown(DESCRIPTION) with gr.Tab(label="Qwen2-VL-2B Input (CPU)"): with gr.Row(): with gr.Column(): input_img = gr.Image(label="Input Picture") text_input = gr.Textbox(label="Question") submit_btn = gr.Button(value="Submit") with gr.Column(): output_text = gr.Textbox(label="Output Text") submit_btn.click(run_example, [input_img, text_input], [output_text]) commandline_args = os.getenv("COMMANDLINE_ARGS", "") demo.queue(api_open=False) demo.launch(inline=False, server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)), debug=True, queue=("--no-gradio-queue" not in commandline_args)) ``` This code should now work without the previous errors.