这个 版本不再支持model.chat的方法了吗

#1
by ss996 - opened

这个 版本不再支持model.chat的方法了吗

OpenBMB org

仍然支持,和之前版本一样调用model.chat方法即可。

Well, maybe it's my problem. Another question is about quantization. Is it a necessary step? There is always a library that prompts me that there is a problem.

This is my code.

import os
import torch
import sys
from transformers import AutoTokenizer, AutoModel
from torchvision.transforms.v2 import ToPILImage
from PIL import Image
from decord import VideoReader, cpu # pip install decord
from huggingface_hub import snapshot_download

定义MiniCPM_VQA类

class MiniCPM_VQA:
def init(self):
self.model_checkpoint = None
self.tokenizer = None
self.model = None
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.bf16_support = (torch.cuda.is_available() and torch.cuda.get_device_capability(self.device)[0] >= 8)

def load_model(self, model_id):
    # 定义模型路径
    models_dir = "C:\PIP_MiniCPM3\MiniCPM-V-2_6-int4"
    print(os.path.basename(model_id))
    self.model_checkpoint = os.path.join(models_dir, "prompt_generator", os.path.basename(model_id))
    if not os.path.exists(self.model_checkpoint):
        snapshot_download(repo_id=model_id, local_dir=self.model_checkpoint, local_dir_use_symlinks=False)

    if self.tokenizer is None:
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_checkpoint, trust_remote_code=True, low_cpu_mem_usage=True)
    if self.model is None:
        self.model = AutoModel.from_pretrained(self.model_checkpoint, trust_remote_code=True, low_cpu_mem_usage=True, attn_implementation="sdpa", torch_dtype=torch.bfloat16 if self.bf16_support else torch.float16)

def encode_image(self, image_path):
    image = Image.open(image_path).convert("RGB")
    return [image]

def inference(self, text, source_image_paths):
    msgs = []
    for image_path in source_image_paths:
        images = self.encode_image(image_path)
        msgs.append({"role": "user", "content": images + [text]})

    with torch.no_grad():
        result = self.model.chat(
            image=None,
            msgs=msgs,
            tokenizer=self.tokenizer,
            sampling=True,
            top_k=100,
            top_p=0.8,
            temperature=0.7,
            repetition_penalty=1.05,
            max_new_tokens=2048,
        )
        return result

if name == "main":
# 获取用户输入
image_paths = input("Please provide the image path(s), separated by commas: ").split(',')
image_paths = [path.strip() for path in image_paths]

# 如果用户没有提供询问语句,则使用默认的询问语句
user_text = input("Please provide your question (press Enter for default): ") or "Provide a detailed description of the image and output the {prompt} to me;\n\nThe requirements for the prompt are as follows:\nFirstly, I need a natural language description, which should not include category descriptions or special symbols.\nThe character should have a detailed description of appearance and attire (describe if applicable, leave blank if not). The subject may be human or non-human. If human, describe their number, gender, and age;\nThe scene description should also include all scene details and the emotional atmosphere conveyed by the image;\nAdditionally, inform me of the artistic style of the image and the shot composition;\nFinally, the order and content of the prompt output must conform to \"Character, Shot Composition, Action, Setting, Style\".\n\nPlease organize the description according to the above requirements and output the English version of the {prompt} to me, do not output any other superfluous content!"

# 创建MiniCPM_VQA实例
mini_cpm_vqa = MiniCPM_VQA()
mini_cpm_vqa.load_model("openbmb/MiniCPM-V-2_6-int4")

# 执行推理
result = mini_cpm_vqa.inference(user_text, image_paths)
print("Generated prompt:", result)

Because I can't use V3, I switched back to the previous version.

Sign up or log in to comment