zqu2004 commited on
Commit
e8777df
1 Parent(s): 602c211

Update app.py

Browse files

use q4 version

Files changed (1) hide show
  1. app.py +7 -17
app.py CHANGED
@@ -4,25 +4,14 @@ from PIL import Image
4
  import torch
5
  import spaces
6
 
7
- # Flag to use GPU (set to False by default)
8
- USE_GPU = False
 
9
 
10
  # Load the processor and model
11
- device = torch.device("cuda" if USE_GPU and torch.cuda.is_available() else "cpu")
12
 
13
- processor = AutoProcessor.from_pretrained(
14
- 'allenai/Molmo-7B-D-0924',
15
- trust_remote_code=True,
16
- torch_dtype='auto',
17
- )
18
-
19
- model = AutoModelForCausalLM.from_pretrained(
20
- 'allenai/Molmo-7B-D-0924',
21
- trust_remote_code=True,
22
- torch_dtype='auto',
23
- )
24
-
25
- model.to(device)
26
 
27
  # Predefined prompts
28
  prompts = [
@@ -41,7 +30,7 @@ def process_image_and_text(image, text, max_new_tokens, temperature, top_p):
41
  )
42
 
43
  # Move inputs to the correct device and make a batch of size 1
44
- inputs = {k: v.to(device).unsqueeze(0) for k, v in inputs.items()}
45
 
46
  # Generate output
47
  output = model.generate_from_batch(
@@ -60,6 +49,7 @@ def process_image_and_text(image, text, max_new_tokens, temperature, top_p):
60
  generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
61
 
62
  return generated_text
 
63
 
64
  def chatbot(image, text, history, max_new_tokens, temperature, top_p):
65
  if image is None:
 
4
  import torch
5
  import spaces
6
 
7
+ # Model name and arguments
8
+ repo_name = "cyan2k/molmo-7B-D-bnb-4bit"
9
+ arguments = {"device_map": "auto", "torch_dtype": "auto", "trust_remote_code": True}
10
 
11
  # Load the processor and model
12
+ processor = AutoProcessor.from_pretrained(repo_name, **arguments)
13
 
14
+ model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments)
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # Predefined prompts
17
  prompts = [
 
30
  )
31
 
32
  # Move inputs to the correct device and make a batch of size 1
33
+ inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
34
 
35
  # Generate output
36
  output = model.generate_from_batch(
 
49
  generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
50
 
51
  return generated_text
52
+
53
 
54
  def chatbot(image, text, history, max_new_tokens, temperature, top_p):
55
  if image is None: