John6666 commited on
Commit
dc4889b
β€’
1 Parent(s): f522a09

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +13 -12
  2. app.py +87 -0
  3. requirements.txt +7 -0
README.md CHANGED
@@ -1,12 +1,13 @@
1
- ---
2
- title: Llama Test
3
- emoji: 🐠
4
- colorFrom: indigo
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 5.5.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
1
+ ---
2
+ title: test
3
+ emoji: πŸ™„
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.44.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ if os.environ.get("SPACES_ZERO_GPU") is not None:
3
+ import spaces
4
+ else:
5
+ class spaces:
6
+ @staticmethod
7
+ def GPU(func):
8
+ def wrapper(*args, **kwargs):
9
+ return func(*args, **kwargs)
10
+ return wrapper
11
+ import gradio as gr
12
+ import requests
13
+ import torch
14
+ from PIL import Image
15
+ from transformers import MllamaForConditionalGeneration, AutoProcessor
16
+
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ #model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
19
+ model_id = "unsloth/Llama-3.2-11B-Vision-Instruct"
20
+ #model_id = "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit"
21
+ url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"
22
+
23
+ model = MllamaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
24
+ processor = AutoProcessor.from_pretrained(model_id)
25
+
26
+ @spaces.GPU(duration=30)
27
+ def infer(message: str, url: str):
28
+ kwargs = {}
29
+ image = Image.open(requests.get(url, stream=True).raw) if url and "http" in url else ""
30
+ if image: kwargs["images"] = image
31
+ messages = [
32
+ {"role": "user", "content": [
33
+ {"type": "image"},
34
+ {"type": "text", "text": message}
35
+ ]}
36
+ ]
37
+
38
+ input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
39
+ #input_text = "<|image|> If I had to write a haiku for this one, it would be: "
40
+ inputs = processor(
41
+ text=input_text,
42
+ add_special_tokens=False,
43
+ return_tensors="pt",
44
+ **kwargs,
45
+ ).to(model.device)
46
+
47
+ output = model.generate(**inputs, max_new_tokens=30)
48
+ output_str = processor.decode(output[0])
49
+ print(message)
50
+ print(url)
51
+ print(output_str)
52
+ return output_str
53
+
54
+ with gr.Blocks() as demo:
55
+ with gr.Row():
56
+ message = gr.Textbox(label="Message", value="Describe the image.", lines=1)
57
+ image_url = gr.Textbox(label="Image URL", value=url, lines=1)
58
+ run_button = gr.Button("Run", variant="primary")
59
+ info_md = gr.Markdown("<br><br><br>")
60
+
61
+ run_button.click(infer, [message, image_url], [info_md])
62
+
63
+ demo.launch()
64
+
65
+ """
66
+ Describe the image.
67
+ https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg
68
+ <|begin_of_text|><|start_header_id|>user<|end_header_id|>
69
+
70
+ <|image|>Describe the image.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
71
+
72
+ This image features a charming anthropomorphic rabbit, attired in a brown waistcoat and tan pants, with a blue coat draped over his shoulders, standing
73
+ If I had to write a haiku for this one, it would be:
74
+
75
+ <|begin_of_text|><|start_header_id|>user<|end_header_id|>
76
+
77
+ <|image|>If I had to write a haiku for this one, it would be: <|eot_id|><|start_header_id|>assistant<|end_header_id|>
78
+
79
+ It seems like you started to write a haiku but didn't finish. Would you like to complete it?<|eot_id|>
80
+ Who are you?
81
+
82
+ <|begin_of_text|><|start_header_id|>user<|end_header_id|>
83
+
84
+ <|image|>Who are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
85
+
86
+ I'm an artificial intelligence model known as Llama. Llama stands for "Large Language Model Meta AI."<|eot_id|>
87
+ """
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ huggingface_hub==0.26.1
2
+ torch
3
+ transformers==4.45.0
4
+ bitsandbytes
5
+ accelerate==1.0.1
6
+ numpy==1.26.4
7
+ datasets==3.0.2