|
--- |
|
license: gemma |
|
--- |
|
|
|
GGUFs of Google's Paligemma 3b 224 mix model. Currently in development so you'll need to use it with https://github.com/ggerganov/llama.cpp/pull/7553 or https://github.com/abetlen/llama-cpp-python/pull/1777 |
|
|
|
# Installation |
|
|
|
```bash |
|
pip install git+https://github.com/abetlen/llama-cpp-python.git@add-paligemma-support |
|
``` |
|
|
|
# Usage |
|
|
|
```python |
|
from llama_cpp import Llama |
|
from llama_cpp.llama_chat_format import PaliGemmaChatHandler |
|
|
|
chat_handler = PaliGemmaChatHandler.from_pretrained( |
|
repo_id="abetlen/paligemma-3b-mix-224-gguf", |
|
filename="*mmproj*", |
|
) |
|
|
|
llm = Llama.from_pretrained( |
|
repo_id="abetlen/paligemma-3b-mix-224-gguf", |
|
filename="*text-model-q4_k_m.gguf", |
|
chat_handler=chat_handler, |
|
n_gpu_layers=-1, |
|
n_ctx=2048, # n_ctx should be increased to accommodate the image embedding |
|
n_ubatch=512, # must be large enough to fit image embeddings and text input in a single batch |
|
n_batch=512 |
|
) |
|
|
|
response = llm.create_chat_completion( |
|
messages = [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type" : "text", "text": "What's in this image?"}, |
|
{"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" } } |
|
|
|
] |
|
} |
|
] |
|
) |
|
``` |