Spaces:
Running
on
Zero
Running
on
Zero
AdrienB134
commited on
Commit
•
9f28ec7
1
Parent(s):
c34d360
fsdv
Browse files
app.py
CHANGED
@@ -22,34 +22,25 @@ import subprocess
|
|
22 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
23 |
|
24 |
|
25 |
-
## Load idefics
|
26 |
-
id_processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3")
|
27 |
|
28 |
-
id_model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
|
29 |
-
torch_dtype=torch.bfloat16,
|
30 |
-
#_attn_implementation="flash_attention_2"
|
31 |
-
).to("cuda")
|
32 |
|
33 |
-
BAD_WORDS_IDS = id_processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
|
34 |
-
EOS_WORDS_IDS = [id_processor.tokenizer.eos_token_id]
|
35 |
|
36 |
-
# Load colpali model
|
37 |
-
model_name = "vidore/colpali-v1.2"
|
38 |
-
token = os.environ.get("HF_TOKEN")
|
39 |
-
model = ColPali.from_pretrained(
|
40 |
-
"vidore/colpaligemma-3b-pt-448-base", torch_dtype=torch.bfloat16, device_map="cuda", token = token).eval()
|
41 |
-
|
42 |
-
model.load_adapter(model_name)
|
43 |
-
model = model.eval()
|
44 |
-
processor = AutoProcessor.from_pretrained(model_name, token = token)
|
45 |
-
|
46 |
-
mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
|
47 |
|
48 |
@spaces.GPU
|
49 |
def model_inference(
|
50 |
images, text, assistant_prefix= None, decoding_strategy = "Greedy", temperature= 0.4, max_new_tokens=512,
|
51 |
repetition_penalty=1.2, top_p=0.8
|
52 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
print(type(images))
|
54 |
images = images[0]
|
55 |
print(type(images))
|
@@ -111,6 +102,18 @@ def model_inference(
|
|
111 |
@spaces.GPU
|
112 |
def search(query: str, ds, images, k):
|
113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
115 |
if device != model.device:
|
116 |
model.to(device)
|
|
|
22 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
23 |
|
24 |
|
|
|
|
|
25 |
|
|
|
|
|
|
|
|
|
26 |
|
|
|
|
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
@spaces.GPU
|
30 |
def model_inference(
|
31 |
images, text, assistant_prefix= None, decoding_strategy = "Greedy", temperature= 0.4, max_new_tokens=512,
|
32 |
repetition_penalty=1.2, top_p=0.8
|
33 |
):
|
34 |
+
## Load idefics
|
35 |
+
id_processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3")
|
36 |
+
|
37 |
+
id_model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
|
38 |
+
torch_dtype=torch.bfloat16,
|
39 |
+
#_attn_implementation="flash_attention_2"
|
40 |
+
).to("cuda")
|
41 |
+
|
42 |
+
BAD_WORDS_IDS = id_processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
|
43 |
+
EOS_WORDS_IDS = [id_processor.tokenizer.eos_token_id]
|
44 |
print(type(images))
|
45 |
images = images[0]
|
46 |
print(type(images))
|
|
|
102 |
@spaces.GPU
|
103 |
def search(query: str, ds, images, k):
|
104 |
|
105 |
+
# Load colpali model
|
106 |
+
model_name = "vidore/colpali-v1.2"
|
107 |
+
token = os.environ.get("HF_TOKEN")
|
108 |
+
model = ColPali.from_pretrained(
|
109 |
+
"vidore/colpaligemma-3b-pt-448-base", torch_dtype=torch.bfloat16, device_map="cuda", token = token).eval()
|
110 |
+
|
111 |
+
model.load_adapter(model_name)
|
112 |
+
model = model.eval()
|
113 |
+
processor = AutoProcessor.from_pretrained(model_name, token = token)
|
114 |
+
|
115 |
+
mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
|
116 |
+
|
117 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
118 |
if device != model.device:
|
119 |
model.to(device)
|