Molmo-4bit

Running on Zero

App Files Files Community

zamal commited on Oct 12

Commit

e80f948

•

1 Parent(s): 91a6ebe

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -10

app.py CHANGED Viewed

@@ -3,30 +3,35 @@ from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
 from PIL import Image
 import requests
 from io import BytesIO
 # Load the model and processor
 repo_name = "cyan2k/molmo-7B-O-bnb-4bit"
 arguments = {
-    "device_map": "auto",   # Force CPU inference
-    "torch_dtype": "auto",  # Set model to use float32 precision
-    "trust_remote_code": True  # Allow the loading of remote code
 }
-# Load the processor and model
 processor = AutoProcessor.from_pretrained(repo_name, **arguments)
-model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments)
 def describe_image(image):
     # Process the uploaded image
     inputs = processor.process(
         images=[image],
         text="Describe this image in great detail without missing any piece of information"
     )
-    # Move inputs to model device
-    inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
-    # Generate output
     output = model.generate_from_batch(
         inputs,
         GenerationConfig(max_new_tokens=1024, stop_strings="<|endoftext|>"),
@@ -39,7 +44,7 @@ def describe_image(image):
     return generated_text
 def gradio_app():
     # Define Gradio interface
     image_input = gr.Image(type="pil", label="Upload Image")
@@ -58,4 +63,4 @@ def gradio_app():
     interface.launch()
 # Launch the Gradio app
-gradio_app()

 from PIL import Image
 import requests
 from io import BytesIO
+import spaces  # Import spaces for ZeroGPU support
 # Load the model and processor
 repo_name = "cyan2k/molmo-7B-O-bnb-4bit"
 arguments = {
+    "device_map": "auto",   # Device will be set automatically
+    "torch_dtype": "auto",  # Use appropriate precision
+    "trust_remote_code": True  # Allow loading remote code
 }
+# Load the processor (this part doesn't need GPU yet)
 processor = AutoProcessor.from_pretrained(repo_name, **arguments)
+# Define the function for image description
+@spaces.GPU  # This ensures the function gets GPU access when needed
 def describe_image(image):
+    # Load the model inside the function and move it to GPU
+    model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments).to('cuda')
     # Process the uploaded image
     inputs = processor.process(
         images=[image],
         text="Describe this image in great detail without missing any piece of information"
     )
+    # Move inputs to model device (GPU)
+    inputs = {k: v.to('cuda').unsqueeze(0) for k, v in inputs.items()}
+    # Generate output using the model on GPU
     output = model.generate_from_batch(
         inputs,
         GenerationConfig(max_new_tokens=1024, stop_strings="<|endoftext|>"),
     return generated_text
+# Gradio interface
 def gradio_app():
     # Define Gradio interface
     image_input = gr.Image(type="pil", label="Upload Image")
     interface.launch()
 # Launch the Gradio app
+gradio_app()