AdrienB134 commited on
Commit
9f28ec7
1 Parent(s): c34d360
Files changed (1) hide show
  1. app.py +22 -19
app.py CHANGED
@@ -22,34 +22,25 @@ import subprocess
22
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
23
 
24
 
25
- ## Load idefics
26
- id_processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3")
27
 
28
- id_model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
29
- torch_dtype=torch.bfloat16,
30
- #_attn_implementation="flash_attention_2"
31
- ).to("cuda")
32
 
33
- BAD_WORDS_IDS = id_processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
34
- EOS_WORDS_IDS = [id_processor.tokenizer.eos_token_id]
35
 
36
- # Load colpali model
37
- model_name = "vidore/colpali-v1.2"
38
- token = os.environ.get("HF_TOKEN")
39
- model = ColPali.from_pretrained(
40
- "vidore/colpaligemma-3b-pt-448-base", torch_dtype=torch.bfloat16, device_map="cuda", token = token).eval()
41
-
42
- model.load_adapter(model_name)
43
- model = model.eval()
44
- processor = AutoProcessor.from_pretrained(model_name, token = token)
45
-
46
- mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
47
 
48
  @spaces.GPU
49
  def model_inference(
50
  images, text, assistant_prefix= None, decoding_strategy = "Greedy", temperature= 0.4, max_new_tokens=512,
51
  repetition_penalty=1.2, top_p=0.8
52
  ):
 
 
 
 
 
 
 
 
 
 
53
  print(type(images))
54
  images = images[0]
55
  print(type(images))
@@ -111,6 +102,18 @@ def model_inference(
111
  @spaces.GPU
112
  def search(query: str, ds, images, k):
113
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
115
  if device != model.device:
116
  model.to(device)
 
22
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
23
 
24
 
 
 
25
 
 
 
 
 
26
 
 
 
27
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  @spaces.GPU
30
  def model_inference(
31
  images, text, assistant_prefix= None, decoding_strategy = "Greedy", temperature= 0.4, max_new_tokens=512,
32
  repetition_penalty=1.2, top_p=0.8
33
  ):
34
+ ## Load idefics
35
+ id_processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3")
36
+
37
+ id_model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
38
+ torch_dtype=torch.bfloat16,
39
+ #_attn_implementation="flash_attention_2"
40
+ ).to("cuda")
41
+
42
+ BAD_WORDS_IDS = id_processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
43
+ EOS_WORDS_IDS = [id_processor.tokenizer.eos_token_id]
44
  print(type(images))
45
  images = images[0]
46
  print(type(images))
 
102
  @spaces.GPU
103
  def search(query: str, ds, images, k):
104
 
105
+ # Load colpali model
106
+ model_name = "vidore/colpali-v1.2"
107
+ token = os.environ.get("HF_TOKEN")
108
+ model = ColPali.from_pretrained(
109
+ "vidore/colpaligemma-3b-pt-448-base", torch_dtype=torch.bfloat16, device_map="cuda", token = token).eval()
110
+
111
+ model.load_adapter(model_name)
112
+ model = model.eval()
113
+ processor = AutoProcessor.from_pretrained(model_name, token = token)
114
+
115
+ mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
116
+
117
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
118
  if device != model.device:
119
  model.to(device)