rag_ColPali_Qwen2VL

Running on Zero

App Files Files Community

AdrienB134 commited on Aug 30

Commit

9f28ec7

•

1 Parent(s): c34d360

fsdv

Browse files

Files changed (1) hide show

app.py +22 -19

app.py CHANGED Viewed

@@ -22,34 +22,25 @@ import subprocess
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
-## Load idefics
-id_processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3")
-id_model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
-        torch_dtype=torch.bfloat16,
-        #_attn_implementation="flash_attention_2"
-                                                        ).to("cuda")
-BAD_WORDS_IDS = id_processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
-EOS_WORDS_IDS = [id_processor.tokenizer.eos_token_id]
-# Load colpali model
-model_name = "vidore/colpali-v1.2"
-token = os.environ.get("HF_TOKEN")
-model = ColPali.from_pretrained(
-    "vidore/colpaligemma-3b-pt-448-base", torch_dtype=torch.bfloat16, device_map="cuda", token = token).eval()
-model.load_adapter(model_name)
-model = model.eval()
-processor = AutoProcessor.from_pretrained(model_name, token = token)
-mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
 @spaces.GPU
 def model_inference(
     images, text, assistant_prefix= None, decoding_strategy = "Greedy", temperature= 0.4, max_new_tokens=512,
     repetition_penalty=1.2, top_p=0.8
 ):
     print(type(images))
     images = images[0]
     print(type(images))
@@ -111,6 +102,18 @@ def model_inference(
 @spaces.GPU
 def search(query: str, ds, images, k):
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     if device != model.device:
         model.to(device)

 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 @spaces.GPU
 def model_inference(
     images, text, assistant_prefix= None, decoding_strategy = "Greedy", temperature= 0.4, max_new_tokens=512,
     repetition_penalty=1.2, top_p=0.8
 ):
+    ## Load idefics
+    id_processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3")
+    id_model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
+            torch_dtype=torch.bfloat16,
+            #_attn_implementation="flash_attention_2"
+                                                            ).to("cuda")
+    BAD_WORDS_IDS = id_processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
+    EOS_WORDS_IDS = [id_processor.tokenizer.eos_token_id]
     print(type(images))
     images = images[0]
     print(type(images))
 @spaces.GPU
 def search(query: str, ds, images, k):
+    # Load colpali model
+    model_name = "vidore/colpali-v1.2"
+    token = os.environ.get("HF_TOKEN")
+    model = ColPali.from_pretrained(
+        "vidore/colpaligemma-3b-pt-448-base", torch_dtype=torch.bfloat16, device_map="cuda", token = token).eval()
+    model.load_adapter(model_name)
+    model = model.eval()
+    processor = AutoProcessor.from_pretrained(model_name, token = token)
+    mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     if device != model.device:
         model.to(device)