Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -3,30 +3,35 @@ from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
|
|
3 |
from PIL import Image
|
4 |
import requests
|
5 |
from io import BytesIO
|
|
|
6 |
|
7 |
# Load the model and processor
|
8 |
repo_name = "cyan2k/molmo-7B-O-bnb-4bit"
|
9 |
arguments = {
|
10 |
-
"device_map": "auto", #
|
11 |
-
"torch_dtype": "auto", #
|
12 |
-
"trust_remote_code": True # Allow
|
13 |
}
|
14 |
|
15 |
-
# Load the processor
|
16 |
processor = AutoProcessor.from_pretrained(repo_name, **arguments)
|
17 |
-
model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments)
|
18 |
|
|
|
|
|
19 |
def describe_image(image):
|
|
|
|
|
|
|
20 |
# Process the uploaded image
|
21 |
inputs = processor.process(
|
22 |
images=[image],
|
23 |
text="Describe this image in great detail without missing any piece of information"
|
24 |
)
|
25 |
|
26 |
-
# Move inputs to model device
|
27 |
-
inputs = {k: v.to(
|
28 |
|
29 |
-
# Generate output
|
30 |
output = model.generate_from_batch(
|
31 |
inputs,
|
32 |
GenerationConfig(max_new_tokens=1024, stop_strings="<|endoftext|>"),
|
@@ -39,7 +44,7 @@ def describe_image(image):
|
|
39 |
|
40 |
return generated_text
|
41 |
|
42 |
-
|
43 |
def gradio_app():
|
44 |
# Define Gradio interface
|
45 |
image_input = gr.Image(type="pil", label="Upload Image")
|
@@ -58,4 +63,4 @@ def gradio_app():
|
|
58 |
interface.launch()
|
59 |
|
60 |
# Launch the Gradio app
|
61 |
-
gradio_app()
|
|
|
3 |
from PIL import Image
|
4 |
import requests
|
5 |
from io import BytesIO
|
6 |
+
import spaces # Import spaces for ZeroGPU support
|
7 |
|
8 |
# Load the model and processor
|
9 |
repo_name = "cyan2k/molmo-7B-O-bnb-4bit"
|
10 |
arguments = {
|
11 |
+
"device_map": "auto", # Device will be set automatically
|
12 |
+
"torch_dtype": "auto", # Use appropriate precision
|
13 |
+
"trust_remote_code": True # Allow loading remote code
|
14 |
}
|
15 |
|
16 |
+
# Load the processor (this part doesn't need GPU yet)
|
17 |
processor = AutoProcessor.from_pretrained(repo_name, **arguments)
|
|
|
18 |
|
19 |
+
# Define the function for image description
|
20 |
+
@spaces.GPU # This ensures the function gets GPU access when needed
|
21 |
def describe_image(image):
|
22 |
+
# Load the model inside the function and move it to GPU
|
23 |
+
model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments).to('cuda')
|
24 |
+
|
25 |
# Process the uploaded image
|
26 |
inputs = processor.process(
|
27 |
images=[image],
|
28 |
text="Describe this image in great detail without missing any piece of information"
|
29 |
)
|
30 |
|
31 |
+
# Move inputs to model device (GPU)
|
32 |
+
inputs = {k: v.to('cuda').unsqueeze(0) for k, v in inputs.items()}
|
33 |
|
34 |
+
# Generate output using the model on GPU
|
35 |
output = model.generate_from_batch(
|
36 |
inputs,
|
37 |
GenerationConfig(max_new_tokens=1024, stop_strings="<|endoftext|>"),
|
|
|
44 |
|
45 |
return generated_text
|
46 |
|
47 |
+
# Gradio interface
|
48 |
def gradio_app():
|
49 |
# Define Gradio interface
|
50 |
image_input = gr.Image(type="pil", label="Upload Image")
|
|
|
63 |
interface.launch()
|
64 |
|
65 |
# Launch the Gradio app
|
66 |
+
gradio_app()
|