Safetensors
English
vidore
manu commited on
Commit
5b7aafb
1 Parent(s): 4951b32

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "./models/paligemma-3b-mix-448",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": false,
8
+ "init_lora_weights": "gaussian",
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 32,
14
+ "lora_dropout": 0.1,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 32,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": "(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$)",
23
+ "task_type": "FEATURE_EXTRACTION",
24
+ "use_dora": false,
25
+ "use_rslora": false
26
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37497e6dd0990a082ebcecf578f3503b257392d3fc23957f8d13357907063770
3
+ size 78485616
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image>": 257152
3
+ }
git_hash.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 83ba45daf034b63549ce091c9fc7d1b05d17381a
preprocessor_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_rescale",
8
+ "rescale_factor",
9
+ "do_normalize",
10
+ "image_mean",
11
+ "image_std",
12
+ "return_tensors",
13
+ "data_format",
14
+ "input_data_format",
15
+ "do_convert_rgb"
16
+ ],
17
+ "do_convert_rgb": null,
18
+ "do_normalize": true,
19
+ "do_rescale": true,
20
+ "do_resize": true,
21
+ "image_mean": [
22
+ 0.5,
23
+ 0.5,
24
+ 0.5
25
+ ],
26
+ "image_processor_type": "SiglipImageProcessor",
27
+ "image_seq_length": 1024,
28
+ "image_std": [
29
+ 0.5,
30
+ 0.5,
31
+ 0.5
32
+ ],
33
+ "processor_class": "PaliGemmaProcessor",
34
+ "resample": 3,
35
+ "rescale_factor": 0.00392156862745098,
36
+ "size": {
37
+ "height": 448,
38
+ "width": 448
39
+ }
40
+ }
results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"validation_set": {"ndcg_at_1": 0.56, "ndcg_at_3": 0.66593, "ndcg_at_5": 0.70225, "ndcg_at_10": 0.72141, "ndcg_at_20": 0.7306, "ndcg_at_100": 0.74127, "ndcg_at_1000": 0.74403, "map_at_1": 0.56, "map_at_3": 0.63967, "map_at_5": 0.65987, "map_at_10": 0.66801, "map_at_20": 0.67058, "map_at_100": 0.67215, "map_at_1000": 0.67228, "recall_at_1": 0.56, "recall_at_3": 0.742, "recall_at_5": 0.83, "recall_at_10": 0.888, "recall_at_20": 0.924, "recall_at_100": 0.98, "recall_at_1000": 1.0, "precision_at_1": 0.56, "precision_at_3": 0.24733, "precision_at_5": 0.166, "precision_at_10": 0.0888, "precision_at_20": 0.0462, "precision_at_100": 0.0098, "precision_at_1000": 0.001, "mrr_at_1": 0.56, "mrr_at_3": 0.6399999999999999, "mrr_at_5": 0.6600999999999996, "mrr_at_10": 0.6679071428571424, "mrr_at_20": 0.670459522666875, "mrr_at_100": 0.6720264773823135, "mrr_at_1000": 0.6721632664080629, "naucs_at_1_max": 0.4581627389936115, "naucs_at_1_std": -0.11351351351351392, "naucs_at_1_diff1": 0.728618624934829, "naucs_at_3_max": 0.579562054762704, "naucs_at_3_std": 0.09742800830806846, "naucs_at_3_diff1": 0.6544501861824129, "naucs_at_5_max": 0.6313419877473956, "naucs_at_5_std": 0.2794160329714977, "naucs_at_5_diff1": 0.629973768076523, "naucs_at_10_max": 0.6300174229818258, "naucs_at_10_std": 0.42831777597402365, "naucs_at_10_diff1": 0.6088737246626365, "naucs_at_20_max": 0.656974033820863, "naucs_at_20_std": 0.45815519190132126, "naucs_at_20_diff1": 0.6552402258712118, "naucs_at_100_max": 0.884892496472819, "naucs_at_100_std": 0.8289449112978374, "naucs_at_100_diff1": 0.8062405273284029, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "syntheticDocQA_energy": {"ndcg_at_1": 0.45, "ndcg_at_3": 0.59464, "ndcg_at_5": 0.61917, "ndcg_at_10": 0.63848, "ndcg_at_20": 0.65715, "ndcg_at_100": 0.67221, "ndcg_at_1000": 0.67615, "map_at_1": 0.45, "map_at_3": 0.55833, "map_at_5": 0.57183, "map_at_10": 0.57973, "map_at_20": 0.58537, "map_at_100": 0.58753, "map_at_1000": 0.5877, "recall_at_1": 0.45, "recall_at_3": 0.7, "recall_at_5": 0.76, "recall_at_10": 0.82, "recall_at_20": 0.89, "recall_at_100": 0.97, "recall_at_1000": 1.0, "precision_at_1": 0.45, "precision_at_3": 0.23333, "precision_at_5": 0.152, "precision_at_10": 0.082, "precision_at_20": 0.0445, "precision_at_100": 0.0097, "precision_at_1000": 0.001, "mrr_at_1": 0.46, "mrr_at_3": 0.5650000000000001, "mrr_at_5": 0.5785000000000001, "mrr_at_10": 0.5863968253968255, "mrr_at_20": 0.5926057918689499, "mrr_at_100": 0.5944153700491968, "mrr_at_1000": 0.5944869489478762, "naucs_at_1_max": 0.27760072881178643, "naucs_at_1_std": -0.3205133241668304, "naucs_at_1_diff1": 0.7809847197023866, "naucs_at_3_max": 0.16591813737969383, "naucs_at_3_std": -0.44570347685101713, "naucs_at_3_diff1": 0.7213709711338673, "naucs_at_5_max": 0.24087976272738487, "naucs_at_5_std": -0.44771567015661423, "naucs_at_5_diff1": 0.7787578618027039, "naucs_at_10_max": 0.20034013728563488, "naucs_at_10_std": -0.4055512721665354, "naucs_at_10_diff1": 0.7419061481152793, "naucs_at_20_max": 0.019486061824278107, "naucs_at_20_std": -0.6947427196976204, "naucs_at_20_diff1": 0.6664907794114918, "naucs_at_100_max": 0.765633257302231, "naucs_at_100_std": 0.3123249299719853, "naucs_at_100_diff1": 0.574147400352231, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_healthcare_industry": {"ndcg_at_1": 0.56, "ndcg_at_3": 0.71488, "ndcg_at_5": 0.73553, "ndcg_at_10": 0.75802, "ndcg_at_20": 0.76846, "ndcg_at_100": 0.77166, "ndcg_at_1000": 0.77166, "map_at_1": 0.56, "map_at_3": 0.67833, "map_at_5": 0.68983, "map_at_10": 0.69903, "map_at_20": 0.70206, "map_at_100": 0.70233, "map_at_1000": 0.70233, "recall_at_1": 0.56, "recall_at_3": 0.82, "recall_at_5": 0.87, "recall_at_10": 0.94, "recall_at_20": 0.98, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.56, "precision_at_3": 0.27333, "precision_at_5": 0.174, "precision_at_10": 0.094, "precision_at_20": 0.049, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.57, "mrr_at_3": 0.685, "mrr_at_5": 0.6965, "mrr_at_10": 0.705873015873016, "mrr_at_20": 0.7089088036146862, "mrr_at_100": 0.7091800085655415, "mrr_at_1000": 0.7091800085655415, "naucs_at_1_max": 0.27151235556191416, "naucs_at_1_std": -0.34398421530774514, "naucs_at_1_diff1": 0.6718377674524622, "naucs_at_3_max": 0.6088638819784546, "naucs_at_3_std": -0.24564930058376347, "naucs_at_3_diff1": 0.5214627680577304, "naucs_at_5_max": 0.6786300711804086, "naucs_at_5_std": -0.418388506257865, "naucs_at_5_diff1": 0.6170382803076527, "naucs_at_10_max": 0.6709272412785969, "naucs_at_10_std": -0.6386554621848701, "naucs_at_10_diff1": 0.4912736526208764, "naucs_at_20_max": 1.0, "naucs_at_20_std": -0.9556489262371616, "naucs_at_20_diff1": 0.6208501444642268, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.59, "ndcg_at_3": 0.68309, "ndcg_at_5": 0.71193, "ndcg_at_10": 0.72446, "ndcg_at_20": 0.73258, "ndcg_at_100": 0.74885, "ndcg_at_1000": 0.75154, "map_at_1": 0.59, "map_at_3": 0.66, "map_at_5": 0.676, "map_at_10": 0.68093, "map_at_20": 0.68344, "map_at_100": 0.68556, "map_at_1000": 0.68568, "recall_at_1": 0.59, "recall_at_3": 0.75, "recall_at_5": 0.82, "recall_at_10": 0.86, "recall_at_20": 0.89, "recall_at_100": 0.98, "recall_at_1000": 1.0, "precision_at_1": 0.59, "precision_at_3": 0.25, "precision_at_5": 0.164, "precision_at_10": 0.086, "precision_at_20": 0.0445, "precision_at_100": 0.0098, "precision_at_1000": 0.001, "mrr_at_1": 0.59, "mrr_at_3": 0.66, "mrr_at_5": 0.676, "mrr_at_10": 0.6809285714285714, "mrr_at_20": 0.6835159840159841, "mrr_at_100": 0.6856355014700788, "mrr_at_1000": 0.6857511979563335, "naucs_at_1_max": 0.3078480331201424, "naucs_at_1_std": -0.329024021600364, "naucs_at_1_diff1": 0.7385072980027542, "naucs_at_3_max": 0.20443257357361794, "naucs_at_3_std": -0.5065829408686535, "naucs_at_3_diff1": 0.563588991665363, "naucs_at_5_max": 0.3715945431261306, "naucs_at_5_std": -0.4646987553695312, "naucs_at_5_diff1": 0.462532944976151, "naucs_at_10_max": 0.4858115646945892, "naucs_at_10_std": -0.2864070943605361, "naucs_at_10_diff1": 0.46357985000803525, "naucs_at_20_max": 0.514081966198569, "naucs_at_20_std": 0.07099905506399802, "naucs_at_20_diff1": 0.3885420013202417, "naucs_at_100_max": 0.9346405228758174, "naucs_at_100_std": 0.41433239962652313, "naucs_at_100_diff1": 0.464766298676341, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_government_reports": {"ndcg_at_1": 0.57, "ndcg_at_3": 0.70464, "ndcg_at_5": 0.73778, "ndcg_at_10": 0.7563, "ndcg_at_20": 0.76396, "ndcg_at_100": 0.76961, "ndcg_at_1000": 0.76961, "map_at_1": 0.57, "map_at_3": 0.67167, "map_at_5": 0.69017, "map_at_10": 0.69731, "map_at_20": 0.69946, "map_at_100": 0.70026, "map_at_1000": 0.70026, "recall_at_1": 0.57, "recall_at_3": 0.8, "recall_at_5": 0.88, "recall_at_10": 0.94, "recall_at_20": 0.97, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.57, "precision_at_3": 0.26667, "precision_at_5": 0.176, "precision_at_10": 0.094, "precision_at_20": 0.0485, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.58, "mrr_at_3": 0.6833333333333332, "mrr_at_5": 0.6993333333333334, "mrr_at_10": 0.7064722222222223, "mrr_at_20": 0.7086511544011544, "mrr_at_100": 0.7094792405518213, "mrr_at_1000": 0.7094792405518213, "naucs_at_1_max": -0.09710136653725754, "naucs_at_1_std": -0.30902254560066594, "naucs_at_1_diff1": 0.593247789506953, "naucs_at_3_max": 0.24695069854364632, "naucs_at_3_std": -0.351298701298701, "naucs_at_3_diff1": 0.3596640210172509, "naucs_at_5_max": 0.45257931081584857, "naucs_at_5_std": -0.18887209672287442, "naucs_at_5_diff1": 0.5140637591124506, "naucs_at_10_max": 0.5333665651709372, "naucs_at_10_std": -0.36718020541549845, "naucs_at_10_diff1": 0.6704850488291154, "naucs_at_20_max": 0.3972009878288819, "naucs_at_20_std": -0.6484593837535011, "naucs_at_20_diff1": 0.531411387933818, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "infovqa_subsampled": {"ndcg_at_1": 0.546, "ndcg_at_3": 0.64462, "ndcg_at_5": 0.67396, "ndcg_at_10": 0.68986, "ndcg_at_20": 0.70191, "ndcg_at_100": 0.71599, "ndcg_at_1000": 0.72174, "map_at_1": 0.546, "map_at_3": 0.62, "map_at_5": 0.6361, "map_at_10": 0.64288, "map_at_20": 0.64642, "map_at_100": 0.64849, "map_at_1000": 0.64872, "recall_at_1": 0.546, "recall_at_3": 0.716, "recall_at_5": 0.788, "recall_at_10": 0.836, "recall_at_20": 0.882, "recall_at_100": 0.956, "recall_at_1000": 1.0, "precision_at_1": 0.546, "precision_at_3": 0.23867, "precision_at_5": 0.1576, "precision_at_10": 0.0836, "precision_at_20": 0.0441, "precision_at_100": 0.00956, "precision_at_1000": 0.001, "mrr_at_1": 0.548, "mrr_at_3": 0.6203333333333334, "mrr_at_5": 0.6367333333333329, "mrr_at_10": 0.6434666666666662, "mrr_at_20": 0.6470144954718479, "mrr_at_100": 0.6490851952778098, "mrr_at_1000": 0.6493156881011598, "naucs_at_1_max": 0.24633576399288384, "naucs_at_1_std": -0.2223154338956795, "naucs_at_1_diff1": 0.6591834210834445, "naucs_at_3_max": 0.36175195459160975, "naucs_at_3_std": -0.08434084789609861, "naucs_at_3_diff1": 0.5413592329290657, "naucs_at_5_max": 0.3101751726254579, "naucs_at_5_std": -0.12780573032527914, "naucs_at_5_diff1": 0.5441719537773002, "naucs_at_10_max": 0.3977732303153332, "naucs_at_10_std": -0.03636821770535881, "naucs_at_10_diff1": 0.5382985443943215, "naucs_at_20_max": 0.6622591725210398, "naucs_at_20_std": 0.26017697686059826, "naucs_at_20_diff1": 0.45866043763190284, "naucs_at_100_max": 0.893549326391487, "naucs_at_100_std": 0.7262965792377566, "naucs_at_100_diff1": 0.41690219846423626, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "docvqa_subsampled": {"ndcg_at_1": 0.2, "ndcg_at_3": 0.2659, "ndcg_at_5": 0.29973, "ndcg_at_10": 0.33819, "ndcg_at_20": 0.35639, "ndcg_at_100": 0.3948, "ndcg_at_1000": 0.42122, "map_at_1": 0.2, "map_at_3": 0.24933, "map_at_5": 0.26813, "map_at_10": 0.2838, "map_at_20": 0.2888, "map_at_100": 0.29417, "map_at_1000": 0.29516, "recall_at_1": 0.2, "recall_at_3": 0.314, "recall_at_5": 0.396, "recall_at_10": 0.516, "recall_at_20": 0.588, "recall_at_100": 0.794, "recall_at_1000": 1.0, "precision_at_1": 0.2, "precision_at_3": 0.10467, "precision_at_5": 0.0792, "precision_at_10": 0.0516, "precision_at_20": 0.0294, "precision_at_100": 0.00794, "precision_at_1000": 0.001, "mrr_at_1": 0.198, "mrr_at_3": 0.24899999999999986, "mrr_at_5": 0.26639999999999997, "mrr_at_10": 0.2826722222222223, "mrr_at_20": 0.2877351715192274, "mrr_at_100": 0.2930046189764325, "mrr_at_1000": 0.2939967523796152, "naucs_at_1_max": 0.2258426518112759, "naucs_at_1_std": -0.016493380690991176, "naucs_at_1_diff1": 0.49799965761911646, "naucs_at_3_max": 0.3104056776517514, "naucs_at_3_std": 0.09240537922113469, "naucs_at_3_diff1": 0.38644937393582046, "naucs_at_5_max": 0.3597126264092387, "naucs_at_5_std": 0.1764766577414579, "naucs_at_5_diff1": 0.3693136135912796, "naucs_at_10_max": 0.4557531098689828, "naucs_at_10_std": 0.3056914665929765, "naucs_at_10_diff1": 0.30113638267628107, "naucs_at_20_max": 0.48037271510149393, "naucs_at_20_std": 0.362469326680371, "naucs_at_20_diff1": 0.2606503125736123, "naucs_at_100_max": 0.6849545172368271, "naucs_at_100_std": 0.6495330524650836, "naucs_at_100_diff1": 0.32818980935171466, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "arxivqa_subsampled": {"ndcg_at_1": 0.464, "ndcg_at_3": 0.54274, "ndcg_at_5": 0.56494, "ndcg_at_10": 0.59419, "ndcg_at_20": 0.61634, "ndcg_at_100": 0.6426, "ndcg_at_1000": 0.64621, "map_at_1": 0.464, "map_at_3": 0.523, "map_at_5": 0.5353, "map_at_10": 0.54745, "map_at_20": 0.55347, "map_at_100": 0.5571, "map_at_1000": 0.55728, "recall_at_1": 0.464, "recall_at_3": 0.6, "recall_at_5": 0.654, "recall_at_10": 0.744, "recall_at_20": 0.832, "recall_at_100": 0.974, "recall_at_1000": 1.0, "precision_at_1": 0.464, "precision_at_3": 0.2, "precision_at_5": 0.1308, "precision_at_10": 0.0744, "precision_at_20": 0.0416, "precision_at_100": 0.00974, "precision_at_1000": 0.001, "mrr_at_1": 0.466, "mrr_at_3": 0.5240000000000001, "mrr_at_5": 0.5361000000000001, "mrr_at_10": 0.5482619047619046, "mrr_at_20": 0.5544001758045876, "mrr_at_100": 0.5579262752668372, "mrr_at_1000": 0.5581100898998852, "naucs_at_1_max": 0.39174526188776193, "naucs_at_1_std": -0.19776885342540754, "naucs_at_1_diff1": 0.629043919334302, "naucs_at_3_max": 0.3165996074228971, "naucs_at_3_std": -0.27436034431582085, "naucs_at_3_diff1": 0.5443034356824094, "naucs_at_5_max": 0.30353705337173154, "naucs_at_5_std": -0.31223778173599515, "naucs_at_5_diff1": 0.5368106159809729, "naucs_at_10_max": 0.27296301058677275, "naucs_at_10_std": -0.40394881650487086, "naucs_at_10_diff1": 0.5336611081025069, "naucs_at_20_max": 0.218393433986842, "naucs_at_20_std": -0.43578939955870744, "naucs_at_20_diff1": 0.483764567978014, "naucs_at_100_max": 0.4432208249913428, "naucs_at_100_std": -0.6542770954535672, "naucs_at_100_diff1": 0.5900161947807036, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": -0.0}, "tabfquad_subsampled": {"ndcg_at_1": 0.63214, "ndcg_at_3": 0.75076, "ndcg_at_5": 0.76906, "ndcg_at_10": 0.79036, "ndcg_at_20": 0.80322, "ndcg_at_100": 0.80465, "ndcg_at_1000": 0.80465, "map_at_1": 0.63214, "map_at_3": 0.72143, "map_at_5": 0.73196, "map_at_10": 0.74104, "map_at_20": 0.74468, "map_at_100": 0.74491, "map_at_1000": 0.74491, "recall_at_1": 0.63214, "recall_at_3": 0.83571, "recall_at_5": 0.87857, "recall_at_10": 0.94286, "recall_at_20": 0.99286, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.63214, "precision_at_3": 0.27857, "precision_at_5": 0.17571, "precision_at_10": 0.09429, "precision_at_20": 0.04964, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.6321428571428571, "mrr_at_3": 0.7220238095238098, "mrr_at_5": 0.732559523809524, "mrr_at_10": 0.7415688775510205, "mrr_at_20": 0.7451886866266567, "mrr_at_100": 0.7454230025752585, "mrr_at_1000": 0.7454230025752585, "naucs_at_1_max": 0.38646770813543463, "naucs_at_1_std": -0.15405576999792106, "naucs_at_1_diff1": 0.770483153687804, "naucs_at_3_max": 0.4715304076938923, "naucs_at_3_std": -0.05538441791811068, "naucs_at_3_diff1": 0.5690504012176135, "naucs_at_5_max": 0.5005477996778747, "naucs_at_5_std": -0.05779501883398043, "naucs_at_5_diff1": 0.5311432702346002, "naucs_at_10_max": 0.5665646111001326, "naucs_at_10_std": -0.14066876750700116, "naucs_at_10_diff1": 0.579542959824386, "naucs_at_20_max": 0.5639455856997528, "naucs_at_20_std": 0.8611111111111035, "naucs_at_20_diff1": 0.41985116196939604, "naucs_at_100_max": -0.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": -0.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "tatdqa": {"ndcg_at_1": 0.20385, "ndcg_at_3": 0.29367, "ndcg_at_5": 0.33414, "ndcg_at_10": 0.38255, "ndcg_at_20": 0.42025, "ndcg_at_100": 0.45904, "ndcg_at_1000": 0.46423, "map_at_1": 0.20385, "map_at_3": 0.2708, "map_at_5": 0.29316, "map_at_10": 0.31302, "map_at_20": 0.32337, "map_at_100": 0.32908, "map_at_1000": 0.32933, "recall_at_1": 0.20385, "recall_at_3": 0.36019, "recall_at_5": 0.45881, "recall_at_10": 0.60914, "recall_at_20": 0.75827, "recall_at_100": 0.96212, "recall_at_1000": 1.0, "precision_at_1": 0.20385, "precision_at_3": 0.12006, "precision_at_5": 0.09176, "precision_at_10": 0.06091, "precision_at_20": 0.03791, "precision_at_100": 0.00962, "precision_at_1000": 0.001, "mrr_at_1": 0.20565243535778713, "mrr_at_3": 0.27189817598717125, "mrr_at_5": 0.2949589096011215, "mrr_at_10": 0.3140590250933003, "mrr_at_20": 0.3243668643319153, "mrr_at_100": 0.330131124419447, "mrr_at_1000": 0.3303795355815673, "naucs_at_1_max": 0.053143613279064535, "naucs_at_1_std": -0.14737481207092906, "naucs_at_1_diff1": 0.2721691321865769, "naucs_at_3_max": 0.061009773438048504, "naucs_at_3_std": -0.16493988734649304, "naucs_at_3_diff1": 0.1581663877851112, "naucs_at_5_max": 0.10526555258170192, "naucs_at_5_std": -0.14858934849303054, "naucs_at_5_diff1": 0.1462139517163512, "naucs_at_10_max": 0.20118614671927704, "naucs_at_10_std": -0.06748566440997046, "naucs_at_10_diff1": 0.06717152271092251, "naucs_at_20_max": 0.3405327527321, "naucs_at_20_std": 0.02827223144042943, "naucs_at_20_diff1": 0.02230439840528239, "naucs_at_100_max": 0.7336818476431518, "naucs_at_100_std": 0.638654029156295, "naucs_at_100_diff1": 0.2602460053507242, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "shift_project": {"ndcg_at_1": 0.34, "ndcg_at_3": 0.40047, "ndcg_at_5": 0.43704, "ndcg_at_10": 0.47408, "ndcg_at_20": 0.50402, "ndcg_at_100": 0.53323, "ndcg_at_1000": 0.5442, "map_at_1": 0.34, "map_at_3": 0.38667, "map_at_5": 0.40667, "map_at_10": 0.42282, "map_at_20": 0.43082, "map_at_100": 0.43465, "map_at_1000": 0.43518, "recall_at_1": 0.34, "recall_at_3": 0.44, "recall_at_5": 0.53, "recall_at_10": 0.64, "recall_at_20": 0.76, "recall_at_100": 0.92, "recall_at_1000": 1.0, "precision_at_1": 0.34, "precision_at_3": 0.14667, "precision_at_5": 0.106, "precision_at_10": 0.064, "precision_at_20": 0.038, "precision_at_100": 0.0092, "precision_at_1000": 0.001, "mrr_at_1": 0.34, "mrr_at_3": 0.38666666666666666, "mrr_at_5": 0.41116666666666674, "mrr_at_10": 0.4255912698412699, "mrr_at_20": 0.43316782194413783, "mrr_at_100": 0.43710276049993996, "mrr_at_1000": 0.4376442513510576, "naucs_at_1_max": -0.07132528035307659, "naucs_at_1_std": -0.28039958663451625, "naucs_at_1_diff1": 0.4762828451227933, "naucs_at_3_max": -0.05471935800275327, "naucs_at_3_std": -0.2769744740825807, "naucs_at_3_diff1": 0.41559115837216676, "naucs_at_5_max": -0.2575223921084339, "naucs_at_5_std": -0.4371606482563329, "naucs_at_5_diff1": 0.34598549622214064, "naucs_at_10_max": -0.383816400740656, "naucs_at_10_std": -0.5900338485119421, "naucs_at_10_diff1": 0.41202909447973185, "naucs_at_20_max": -0.3554185063876152, "naucs_at_20_std": -0.6295318854373965, "naucs_at_20_diff1": 0.3329256757553939, "naucs_at_100_max": 0.012537707390648677, "naucs_at_100_std": -0.14256535947712445, "naucs_at_100_diff1": 0.19171033524016706, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}}
special_tokens_map.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<image>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
+ "bos_token": {
12
+ "content": "<bos>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "eos_token": {
19
+ "content": "<eos>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "pad_token": {
26
+ "content": "<pad>",
27
+ "lstrip": false,
28
+ "normalized": false,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ },
32
+ "unk_token": {
33
+ "content": "<unk>",
34
+ "lstrip": false,
35
+ "normalized": false,
36
+ "rstrip": false,
37
+ "single_word": false
38
+ }
39
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1df2ab04780faccf51a881d7c5a7026cc6f979083af2eebf709d051b8d47134b
3
+ size 17763458
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8986bb4f423f07f8c7f70d0dbe3526fb2316056c17bae71b1ea975e77a168fc6
3
+ size 4264023
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
training_config.yml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config:
2
+ (): custom_colbert.utils.train_custom_colbert_models.ColModelTrainingConfig
3
+ output_dir: !path ../../../models/without_tabfquad_no_pairwise/train_bipali_mean-3b-mix-448
4
+ processor:
5
+ () : custom_colbert.utils.wrapper.AutoProcessorWrapper
6
+ pretrained_model_name_or_path: "./models/paligemma-3b-mix-448"
7
+ max_length: 50
8
+ model:
9
+ (): custom_colbert.utils.wrapper.AutoColModelWrapper
10
+ pretrained_model_name_or_path: "./models/paligemma-3b-mix-448"
11
+ training_objective: "biencoder_mean"
12
+ # attn_implementation: "eager"
13
+ torch_dtype: !ext torch.bfloat16
14
+ # device_map: "auto"
15
+ # quantization_config:
16
+ # (): transformers.BitsAndBytesConfig
17
+ # load_in_4bit: true
18
+ # bnb_4bit_quant_type: "nf4"
19
+ # bnb_4bit_compute_dtype: "bfloat16"
20
+ # bnb_4bit_use_double_quant: true
21
+
22
+ dataset_loading_func: !ext custom_colbert.utils.dataset_transformation.load_train_set
23
+ eval_dataset_loader: !import ../data/test_data.yaml
24
+
25
+ max_length: 50
26
+ run_eval: true
27
+ add_suffix: true
28
+ loss_func:
29
+ (): custom_colbert.loss.colbert_loss.BiEncoderLoss
30
+ tr_args: !import ../tr_args/default_tr_args.yaml
31
+ peft_config:
32
+ (): peft.LoraConfig
33
+ r: 32
34
+ lora_alpha: 32
35
+ lora_dropout: 0.1
36
+ init_lora_weights: "gaussian"
37
+ bias: "none"
38
+ task_type: "FEATURE_EXTRACTION"
39
+ target_modules: '(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$)'
40
+ # target_modules: '(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$'
41
+