bidiptas
/

PG-InstructBLIP

image-captioning

Model card Files Files and versions Community

PG-InstructBLIP / test.py

Bidipta Sarkar

Correct output from test.py

f536ed7 12 months ago

history blame contribute delete

No virus

1.39 kB

	import torch
	from PIL import Image
	from omegaconf import OmegaConf

	from lavis.models import load_model, load_preprocess
	from lavis.common.registry import registry

	import requests

	from generate import generate

	url = "https://iliad.stanford.edu/pg-vlm/example_images/ceramic_bowl.jpg"
	example_image = Image.open(requests.get(url, stream=True).raw).convert("RGB")

	vlm = load_model(
	name='blip2_t5_instruct',
	model_type='flant5xxl',
	checkpoint='pgvlm_weights.bin', # replace with location of downloaded weights
	is_eval=True,
	device="cuda" if torch.cuda.is_available() else "cpu"
	)

	vlm.qformer_text_input = False # Optionally disable qformer text

	model_cls = registry.get_model_class('blip2_t5_instruct')
	model_type = 'flant5xxl'
	preprocess_cfg = OmegaConf.load(model_cls.default_config_path(model_type)).preprocess
	vis_processors, _ = load_preprocess(preprocess_cfg)
	processor = vis_processors["eval"]

	question_samples = {
	'prompt': 'Question: Classify this object as transparent, translucent, or opaque? Respond unknown if you are not sure. Short answer:',
	'image': torch.stack([processor(example_image)], dim=0).to(vlm.device)
	}

	answers, scores = generate(vlm, question_samples, length_penalty=0, repetition_penalty=1, num_captions=3)
	print(answers, scores)
	# ['opaque', 'translucent', 'transparent'] tensor([-0.0373, -4.2404, -4.4436], device='cuda:0')