John6666 commited on
Commit
f4aa29b
1 Parent(s): dc4889b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -86
app.py CHANGED
@@ -1,87 +1,87 @@
1
- import os
2
- if os.environ.get("SPACES_ZERO_GPU") is not None:
3
- import spaces
4
- else:
5
- class spaces:
6
- @staticmethod
7
- def GPU(func):
8
- def wrapper(*args, **kwargs):
9
- return func(*args, **kwargs)
10
- return wrapper
11
- import gradio as gr
12
- import requests
13
- import torch
14
- from PIL import Image
15
- from transformers import MllamaForConditionalGeneration, AutoProcessor
16
-
17
- device = "cuda" if torch.cuda.is_available() else "cpu"
18
- #model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
19
- model_id = "unsloth/Llama-3.2-11B-Vision-Instruct"
20
- #model_id = "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit"
21
- url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"
22
-
23
- model = MllamaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
24
- processor = AutoProcessor.from_pretrained(model_id)
25
-
26
- @spaces.GPU(duration=30)
27
- def infer(message: str, url: str):
28
- kwargs = {}
29
- image = Image.open(requests.get(url, stream=True).raw) if url and "http" in url else ""
30
- if image: kwargs["images"] = image
31
- messages = [
32
- {"role": "user", "content": [
33
- {"type": "image"},
34
- {"type": "text", "text": message}
35
- ]}
36
- ]
37
-
38
- input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
39
- #input_text = "<|image|> If I had to write a haiku for this one, it would be: "
40
- inputs = processor(
41
- text=input_text,
42
- add_special_tokens=False,
43
- return_tensors="pt",
44
- **kwargs,
45
- ).to(model.device)
46
-
47
- output = model.generate(**inputs, max_new_tokens=30)
48
- output_str = processor.decode(output[0])
49
- print(message)
50
- print(url)
51
- print(output_str)
52
- return output_str
53
-
54
- with gr.Blocks() as demo:
55
- with gr.Row():
56
- message = gr.Textbox(label="Message", value="Describe the image.", lines=1)
57
- image_url = gr.Textbox(label="Image URL", value=url, lines=1)
58
- run_button = gr.Button("Run", variant="primary")
59
- info_md = gr.Markdown("<br><br><br>")
60
-
61
- run_button.click(infer, [message, image_url], [info_md])
62
-
63
- demo.launch()
64
-
65
- """
66
- Describe the image.
67
- https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg
68
- <|begin_of_text|><|start_header_id|>user<|end_header_id|>
69
-
70
- <|image|>Describe the image.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
71
-
72
- This image features a charming anthropomorphic rabbit, attired in a brown waistcoat and tan pants, with a blue coat draped over his shoulders, standing
73
- If I had to write a haiku for this one, it would be:
74
-
75
- <|begin_of_text|><|start_header_id|>user<|end_header_id|>
76
-
77
- <|image|>If I had to write a haiku for this one, it would be: <|eot_id|><|start_header_id|>assistant<|end_header_id|>
78
-
79
- It seems like you started to write a haiku but didn't finish. Would you like to complete it?<|eot_id|>
80
- Who are you?
81
-
82
- <|begin_of_text|><|start_header_id|>user<|end_header_id|>
83
-
84
- <|image|>Who are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
85
-
86
- I'm an artificial intelligence model known as Llama. Llama stands for "Large Language Model Meta AI."<|eot_id|>
87
  """
 
1
+ import os
2
+ if os.environ.get("SPACES_ZERO_GPU") is not None:
3
+ import spaces
4
+ else:
5
+ class spaces:
6
+ @staticmethod
7
+ def GPU(func):
8
+ def wrapper(*args, **kwargs):
9
+ return func(*args, **kwargs)
10
+ return wrapper
11
+ import gradio as gr
12
+ import requests
13
+ import torch
14
+ from PIL import Image
15
+ from transformers import MllamaForConditionalGeneration, AutoProcessor
16
+
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ #model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
19
+ model_id = "unsloth/Llama-3.2-11B-Vision-Instruct"
20
+ #model_id = "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit"
21
+ url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"
22
+
23
+ model = MllamaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
24
+ processor = AutoProcessor.from_pretrained(model_id)
25
+
26
+ @spaces.GPU
27
+ def infer(message: str, url: str):
28
+ kwargs = {}
29
+ image = Image.open(requests.get(url, stream=True).raw) if url and "http" in url else ""
30
+ if image: kwargs["images"] = image
31
+ messages = [
32
+ {"role": "user", "content": [
33
+ {"type": "image"},
34
+ {"type": "text", "text": message}
35
+ ]}
36
+ ]
37
+
38
+ input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
39
+ #input_text = "<|image|> If I had to write a haiku for this one, it would be: "
40
+ inputs = processor(
41
+ text=input_text,
42
+ add_special_tokens=False,
43
+ return_tensors="pt",
44
+ **kwargs,
45
+ ).to(model.device)
46
+
47
+ output = model.generate(**inputs, max_new_tokens=30)
48
+ output_str = processor.decode(output[0])
49
+ print(message)
50
+ print(url)
51
+ print(output_str)
52
+ return output_str
53
+
54
+ with gr.Blocks() as demo:
55
+ with gr.Row():
56
+ message = gr.Textbox(label="Message", value="Describe the image.", lines=1)
57
+ image_url = gr.Textbox(label="Image URL", value=url, lines=1)
58
+ run_button = gr.Button("Run", variant="primary")
59
+ info_md = gr.Markdown("<br><br><br>")
60
+
61
+ run_button.click(infer, [message, image_url], [info_md])
62
+
63
+ demo.launch()
64
+
65
+ """
66
+ Describe the image.
67
+ https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg
68
+ <|begin_of_text|><|start_header_id|>user<|end_header_id|>
69
+
70
+ <|image|>Describe the image.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
71
+
72
+ This image features a charming anthropomorphic rabbit, attired in a brown waistcoat and tan pants, with a blue coat draped over his shoulders, standing
73
+ If I had to write a haiku for this one, it would be:
74
+
75
+ <|begin_of_text|><|start_header_id|>user<|end_header_id|>
76
+
77
+ <|image|>If I had to write a haiku for this one, it would be: <|eot_id|><|start_header_id|>assistant<|end_header_id|>
78
+
79
+ It seems like you started to write a haiku but didn't finish. Would you like to complete it?<|eot_id|>
80
+ Who are you?
81
+
82
+ <|begin_of_text|><|start_header_id|>user<|end_header_id|>
83
+
84
+ <|image|>Who are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
85
+
86
+ I'm an artificial intelligence model known as Llama. Llama stands for "Large Language Model Meta AI."<|eot_id|>
87
  """