--- library_name: transformers tags: [] --- # Usage Example ``` import requests from PIL import Image from transformers import MllamaForConditionalGeneration, AutoProcessor, BitsAndBytesConfig def get_image_description(model, processor, image, initial_prompt='', max_new_tokens=70, *args, **kwargs): initial_prompt = initial_prompt if initial_prompt != '' else "How would you describe the contents of this photo?" messages = [ {"role": "user", "content": [ {"type": "image"}, {"type": "text", "text": initial_prompt} ]} ] input_text = processor.apply_chat_template( messages, add_generation_prompt=True) inputs = processor( image, input_text, add_special_tokens=False, return_tensors="pt" ).to(model.device) output = model.generate(**inputs, max_new_tokens=max_new_tokens) return processor.decode(output[0]) def load_model(model_id="belkhir-nacim/l32vision_instruct"): bnb_config = BitsAndBytesConfig( load_in_4bit=True, # Enable 4-bit quantization ) model = MllamaForConditionalGeneration.from_pretrained( model_id, device_map="auto",quantization_config=bnb_config) processor = AutoProcessor.from_pretrained(model_id) return model, processor model, processor = load_model() url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg" image = Image.open(requests.get(url, stream=True).raw) result = get_image_description( model, processor, image, initial_prompt="Tell me what do you see in the image. use keywords to describe") print(result) ```