Spaces:

zqu2004
/

MolMo-7B-D-0924

Running

zqu2004 commited on Sep 26

Commit

e8777df

•

1 Parent(s): 602c211

Update app.py

use q4 version

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,25 +4,14 @@ from PIL import Image
 import torch
 import spaces
-# Flag to use GPU (set to False by default)
-USE_GPU = False
 # Load the processor and model
-device = torch.device("cuda" if USE_GPU and torch.cuda.is_available() else "cpu")
-processor = AutoProcessor.from_pretrained(
-    'allenai/Molmo-7B-D-0924',
-    trust_remote_code=True,
-    torch_dtype='auto',
-)
-model = AutoModelForCausalLM.from_pretrained(
-    'allenai/Molmo-7B-D-0924',
-    trust_remote_code=True,
-    torch_dtype='auto',
-)
-model.to(device)
 # Predefined prompts
 prompts = [
@@ -41,7 +30,7 @@ def process_image_and_text(image, text, max_new_tokens, temperature, top_p):
     )
     # Move inputs to the correct device and make a batch of size 1
-    inputs = {k: v.to(device).unsqueeze(0) for k, v in inputs.items()}
     # Generate output
     output = model.generate_from_batch(
@@ -60,6 +49,7 @@ def process_image_and_text(image, text, max_new_tokens, temperature, top_p):
     generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
     return generated_text
 def chatbot(image, text, history, max_new_tokens, temperature, top_p):
     if image is None:

 import torch
 import spaces
+# Model name and arguments
+repo_name = "cyan2k/molmo-7B-D-bnb-4bit"
+arguments = {"device_map": "auto", "torch_dtype": "auto", "trust_remote_code": True}
 # Load the processor and model
+processor = AutoProcessor.from_pretrained(repo_name, **arguments)
+model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments)
 # Predefined prompts
 prompts = [
     )
     # Move inputs to the correct device and make a batch of size 1
+    inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
     # Generate output
     output = model.generate_from_batch(
     generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
     return generated_text
 def chatbot(image, text, history, max_new_tokens, temperature, top_p):
     if image is None: