Batch inferencing bug: All outputs in the same batch has the same prediction, even for different images
#8
by
weihf
- opened
Model is great, but for batch inferencing, the problem is, even for different images, the output is the same, it seems to be just doing inferencing on a single image.
This is my test code:
from transformers import AutoProcessor, Kosmos2_5ForConditionalGeneration
from PIL import Image
import torch
from tqdm import tqdm
import os
DTYPE = torch.bfloat16
def initialize_model(model_name: str):
print(f"Initializing {model_name} model")
model = Kosmos2_5ForConditionalGeneration.from_pretrained(model_name, device_map="auto", torch_dtype=DTYPE)
processor = AutoProcessor.from_pretrained(model_name)
return model, processor
def test_kosmos_batch(
model,
processor,
image_dir,
prompt="<md>",
batch_size=1,
device = "cuda",
dtype = torch.bfloat16,
max_new_tokens=1024,
):
# Read image paths from the image_dir string
image_paths = [os.path.join(image_dir, img) for img in os.listdir(image_dir)]
outputs = []
num_batches = (len(image_paths) + batch_size - 1) // batch_size
for i in tqdm(range(num_batches)):
batch_paths = image_paths[i * batch_size : (i + 1) * batch_size]
images = [Image.open(path) for path in batch_paths]
inputs = processor(
text=[prompt] * len(images),
images=images,
return_tensors="pt",
padding=True,
)
inputs = {k: v.to(device) if v is not None else None for k, v in inputs.items()}
inputs["flattened_patches"] = inputs["flattened_patches"].to(dtype)
print("Processing inputs completed")
try:
del inputs["width"]
del inputs["height"]
except KeyError:
pass
generated_ids = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
outputs.extend(generated_text)
for i, output in enumerate(outputs):
print("=========================================")
print(f"Output {i}: {output}")
return outputs
if __name__ == "__main__":
model_name = "microsoft/kosmos-2.5"
model, processor = initialize_model(model_name)
print("Model initialized successfully!")
image_paths = "./images"
test_kosmos_batch(model, processor, image_paths, batch_size=3, max_new_tokens=50)
Output is for the same image, which should not be the case:
=========================================
Output 0: <md>**Section 10.** **Effectivity.** This Circular shall take effect fifteen (15) calendar days following its publication either in the Official Gazette or in a newspaper of general circulation.
# Classification: GENERAL
=========================================
Output 1: <md>10\. *Effectivity*. This Circular shall take effect fifteen (15) calendar days following its publication either in the Official Gazette or in a newspaper of general circulation.
# Classification: GENERAL
=========================================
Output 2: <md># Section 10. Effectivity. This Circular shall take effect fifteen (15) calendar days following its publication in the Official Gazette or in a newspaper of general circulation.
# 1. The effectivity of this Circular shall be determined by the following