|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from PIL import Image |
|
import requests |
|
from io import BytesIO |
|
|
|
url = "https://d2h50zujfkj84t.cloudfront.net/product_images/Screenshot_2024-09-03_135657.png" |
|
response = requests.get(url) |
|
image = Image.open(BytesIO(response.content)) |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
"qresearch/llama-3.1-8B-vision-378", |
|
trust_remote_code=True, |
|
torch_dtype=torch.float16, |
|
).to("cpu") |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("qresearch/llama-3.1-8B-vision-378", use_fast=True,) |
|
|
|
print( |
|
model.answer_question( |
|
image, "Briefly describe the image", tokenizer, max_new_tokens=128, do_sample=True, temperature=0.3 |
|
), |
|
) |
|
|
|
|
|
|
|
|