Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- adapter_config.json +26 -0
- adapter_model.safetensors +3 -0
- added_tokens.json +3 -0
- git_hash.txt +1 -0
- preprocessor_config.json +40 -0
- results.json +1 -0
- special_tokens_map.json +39 -0
- tokenizer.json +3 -0
- tokenizer.model +3 -0
- tokenizer_config.json +0 -0
- training_config.yml +41 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "./models/paligemma-3b-mix-448",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": false,
|
8 |
+
"init_lora_weights": "gaussian",
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 32,
|
14 |
+
"lora_dropout": 0.1,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": null,
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 32,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": "(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$)",
|
23 |
+
"task_type": "FEATURE_EXTRACTION",
|
24 |
+
"use_dora": false,
|
25 |
+
"use_rslora": false
|
26 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37497e6dd0990a082ebcecf578f3503b257392d3fc23957f8d13357907063770
|
3 |
+
size 78485616
|
added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<image>": 257152
|
3 |
+
}
|
git_hash.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
83ba45daf034b63549ce091c9fc7d1b05d17381a
|
preprocessor_config.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_valid_processor_keys": [
|
3 |
+
"images",
|
4 |
+
"do_resize",
|
5 |
+
"size",
|
6 |
+
"resample",
|
7 |
+
"do_rescale",
|
8 |
+
"rescale_factor",
|
9 |
+
"do_normalize",
|
10 |
+
"image_mean",
|
11 |
+
"image_std",
|
12 |
+
"return_tensors",
|
13 |
+
"data_format",
|
14 |
+
"input_data_format",
|
15 |
+
"do_convert_rgb"
|
16 |
+
],
|
17 |
+
"do_convert_rgb": null,
|
18 |
+
"do_normalize": true,
|
19 |
+
"do_rescale": true,
|
20 |
+
"do_resize": true,
|
21 |
+
"image_mean": [
|
22 |
+
0.5,
|
23 |
+
0.5,
|
24 |
+
0.5
|
25 |
+
],
|
26 |
+
"image_processor_type": "SiglipImageProcessor",
|
27 |
+
"image_seq_length": 1024,
|
28 |
+
"image_std": [
|
29 |
+
0.5,
|
30 |
+
0.5,
|
31 |
+
0.5
|
32 |
+
],
|
33 |
+
"processor_class": "PaliGemmaProcessor",
|
34 |
+
"resample": 3,
|
35 |
+
"rescale_factor": 0.00392156862745098,
|
36 |
+
"size": {
|
37 |
+
"height": 448,
|
38 |
+
"width": 448
|
39 |
+
}
|
40 |
+
}
|
results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation_set": {"ndcg_at_1": 0.56, "ndcg_at_3": 0.66593, "ndcg_at_5": 0.70225, "ndcg_at_10": 0.72141, "ndcg_at_20": 0.7306, "ndcg_at_100": 0.74127, "ndcg_at_1000": 0.74403, "map_at_1": 0.56, "map_at_3": 0.63967, "map_at_5": 0.65987, "map_at_10": 0.66801, "map_at_20": 0.67058, "map_at_100": 0.67215, "map_at_1000": 0.67228, "recall_at_1": 0.56, "recall_at_3": 0.742, "recall_at_5": 0.83, "recall_at_10": 0.888, "recall_at_20": 0.924, "recall_at_100": 0.98, "recall_at_1000": 1.0, "precision_at_1": 0.56, "precision_at_3": 0.24733, "precision_at_5": 0.166, "precision_at_10": 0.0888, "precision_at_20": 0.0462, "precision_at_100": 0.0098, "precision_at_1000": 0.001, "mrr_at_1": 0.56, "mrr_at_3": 0.6399999999999999, "mrr_at_5": 0.6600999999999996, "mrr_at_10": 0.6679071428571424, "mrr_at_20": 0.670459522666875, "mrr_at_100": 0.6720264773823135, "mrr_at_1000": 0.6721632664080629, "naucs_at_1_max": 0.4581627389936115, "naucs_at_1_std": -0.11351351351351392, "naucs_at_1_diff1": 0.728618624934829, "naucs_at_3_max": 0.579562054762704, "naucs_at_3_std": 0.09742800830806846, "naucs_at_3_diff1": 0.6544501861824129, "naucs_at_5_max": 0.6313419877473956, "naucs_at_5_std": 0.2794160329714977, "naucs_at_5_diff1": 0.629973768076523, "naucs_at_10_max": 0.6300174229818258, "naucs_at_10_std": 0.42831777597402365, "naucs_at_10_diff1": 0.6088737246626365, "naucs_at_20_max": 0.656974033820863, "naucs_at_20_std": 0.45815519190132126, "naucs_at_20_diff1": 0.6552402258712118, "naucs_at_100_max": 0.884892496472819, "naucs_at_100_std": 0.8289449112978374, "naucs_at_100_diff1": 0.8062405273284029, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "syntheticDocQA_energy": {"ndcg_at_1": 0.45, "ndcg_at_3": 0.59464, "ndcg_at_5": 0.61917, "ndcg_at_10": 0.63848, "ndcg_at_20": 0.65715, "ndcg_at_100": 0.67221, "ndcg_at_1000": 0.67615, "map_at_1": 0.45, "map_at_3": 0.55833, "map_at_5": 0.57183, "map_at_10": 0.57973, "map_at_20": 0.58537, "map_at_100": 0.58753, "map_at_1000": 0.5877, "recall_at_1": 0.45, "recall_at_3": 0.7, "recall_at_5": 0.76, "recall_at_10": 0.82, "recall_at_20": 0.89, "recall_at_100": 0.97, "recall_at_1000": 1.0, "precision_at_1": 0.45, "precision_at_3": 0.23333, "precision_at_5": 0.152, "precision_at_10": 0.082, "precision_at_20": 0.0445, "precision_at_100": 0.0097, "precision_at_1000": 0.001, "mrr_at_1": 0.46, "mrr_at_3": 0.5650000000000001, "mrr_at_5": 0.5785000000000001, "mrr_at_10": 0.5863968253968255, "mrr_at_20": 0.5926057918689499, "mrr_at_100": 0.5944153700491968, "mrr_at_1000": 0.5944869489478762, "naucs_at_1_max": 0.27760072881178643, "naucs_at_1_std": -0.3205133241668304, "naucs_at_1_diff1": 0.7809847197023866, "naucs_at_3_max": 0.16591813737969383, "naucs_at_3_std": -0.44570347685101713, "naucs_at_3_diff1": 0.7213709711338673, "naucs_at_5_max": 0.24087976272738487, "naucs_at_5_std": -0.44771567015661423, "naucs_at_5_diff1": 0.7787578618027039, "naucs_at_10_max": 0.20034013728563488, "naucs_at_10_std": -0.4055512721665354, "naucs_at_10_diff1": 0.7419061481152793, "naucs_at_20_max": 0.019486061824278107, "naucs_at_20_std": -0.6947427196976204, "naucs_at_20_diff1": 0.6664907794114918, "naucs_at_100_max": 0.765633257302231, "naucs_at_100_std": 0.3123249299719853, "naucs_at_100_diff1": 0.574147400352231, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_healthcare_industry": {"ndcg_at_1": 0.56, "ndcg_at_3": 0.71488, "ndcg_at_5": 0.73553, "ndcg_at_10": 0.75802, "ndcg_at_20": 0.76846, "ndcg_at_100": 0.77166, "ndcg_at_1000": 0.77166, "map_at_1": 0.56, "map_at_3": 0.67833, "map_at_5": 0.68983, "map_at_10": 0.69903, "map_at_20": 0.70206, "map_at_100": 0.70233, "map_at_1000": 0.70233, "recall_at_1": 0.56, "recall_at_3": 0.82, "recall_at_5": 0.87, "recall_at_10": 0.94, "recall_at_20": 0.98, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.56, "precision_at_3": 0.27333, "precision_at_5": 0.174, "precision_at_10": 0.094, "precision_at_20": 0.049, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.57, "mrr_at_3": 0.685, "mrr_at_5": 0.6965, "mrr_at_10": 0.705873015873016, "mrr_at_20": 0.7089088036146862, "mrr_at_100": 0.7091800085655415, "mrr_at_1000": 0.7091800085655415, "naucs_at_1_max": 0.27151235556191416, "naucs_at_1_std": -0.34398421530774514, "naucs_at_1_diff1": 0.6718377674524622, "naucs_at_3_max": 0.6088638819784546, "naucs_at_3_std": -0.24564930058376347, "naucs_at_3_diff1": 0.5214627680577304, "naucs_at_5_max": 0.6786300711804086, "naucs_at_5_std": -0.418388506257865, "naucs_at_5_diff1": 0.6170382803076527, "naucs_at_10_max": 0.6709272412785969, "naucs_at_10_std": -0.6386554621848701, "naucs_at_10_diff1": 0.4912736526208764, "naucs_at_20_max": 1.0, "naucs_at_20_std": -0.9556489262371616, "naucs_at_20_diff1": 0.6208501444642268, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.59, "ndcg_at_3": 0.68309, "ndcg_at_5": 0.71193, "ndcg_at_10": 0.72446, "ndcg_at_20": 0.73258, "ndcg_at_100": 0.74885, "ndcg_at_1000": 0.75154, "map_at_1": 0.59, "map_at_3": 0.66, "map_at_5": 0.676, "map_at_10": 0.68093, "map_at_20": 0.68344, "map_at_100": 0.68556, "map_at_1000": 0.68568, "recall_at_1": 0.59, "recall_at_3": 0.75, "recall_at_5": 0.82, "recall_at_10": 0.86, "recall_at_20": 0.89, "recall_at_100": 0.98, "recall_at_1000": 1.0, "precision_at_1": 0.59, "precision_at_3": 0.25, "precision_at_5": 0.164, "precision_at_10": 0.086, "precision_at_20": 0.0445, "precision_at_100": 0.0098, "precision_at_1000": 0.001, "mrr_at_1": 0.59, "mrr_at_3": 0.66, "mrr_at_5": 0.676, "mrr_at_10": 0.6809285714285714, "mrr_at_20": 0.6835159840159841, "mrr_at_100": 0.6856355014700788, "mrr_at_1000": 0.6857511979563335, "naucs_at_1_max": 0.3078480331201424, "naucs_at_1_std": -0.329024021600364, "naucs_at_1_diff1": 0.7385072980027542, "naucs_at_3_max": 0.20443257357361794, "naucs_at_3_std": -0.5065829408686535, "naucs_at_3_diff1": 0.563588991665363, "naucs_at_5_max": 0.3715945431261306, "naucs_at_5_std": -0.4646987553695312, "naucs_at_5_diff1": 0.462532944976151, "naucs_at_10_max": 0.4858115646945892, "naucs_at_10_std": -0.2864070943605361, "naucs_at_10_diff1": 0.46357985000803525, "naucs_at_20_max": 0.514081966198569, "naucs_at_20_std": 0.07099905506399802, "naucs_at_20_diff1": 0.3885420013202417, "naucs_at_100_max": 0.9346405228758174, "naucs_at_100_std": 0.41433239962652313, "naucs_at_100_diff1": 0.464766298676341, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_government_reports": {"ndcg_at_1": 0.57, "ndcg_at_3": 0.70464, "ndcg_at_5": 0.73778, "ndcg_at_10": 0.7563, "ndcg_at_20": 0.76396, "ndcg_at_100": 0.76961, "ndcg_at_1000": 0.76961, "map_at_1": 0.57, "map_at_3": 0.67167, "map_at_5": 0.69017, "map_at_10": 0.69731, "map_at_20": 0.69946, "map_at_100": 0.70026, "map_at_1000": 0.70026, "recall_at_1": 0.57, "recall_at_3": 0.8, "recall_at_5": 0.88, "recall_at_10": 0.94, "recall_at_20": 0.97, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.57, "precision_at_3": 0.26667, "precision_at_5": 0.176, "precision_at_10": 0.094, "precision_at_20": 0.0485, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.58, "mrr_at_3": 0.6833333333333332, "mrr_at_5": 0.6993333333333334, "mrr_at_10": 0.7064722222222223, "mrr_at_20": 0.7086511544011544, "mrr_at_100": 0.7094792405518213, "mrr_at_1000": 0.7094792405518213, "naucs_at_1_max": -0.09710136653725754, "naucs_at_1_std": -0.30902254560066594, "naucs_at_1_diff1": 0.593247789506953, "naucs_at_3_max": 0.24695069854364632, "naucs_at_3_std": -0.351298701298701, "naucs_at_3_diff1": 0.3596640210172509, "naucs_at_5_max": 0.45257931081584857, "naucs_at_5_std": -0.18887209672287442, "naucs_at_5_diff1": 0.5140637591124506, "naucs_at_10_max": 0.5333665651709372, "naucs_at_10_std": -0.36718020541549845, "naucs_at_10_diff1": 0.6704850488291154, "naucs_at_20_max": 0.3972009878288819, "naucs_at_20_std": -0.6484593837535011, "naucs_at_20_diff1": 0.531411387933818, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "infovqa_subsampled": {"ndcg_at_1": 0.546, "ndcg_at_3": 0.64462, "ndcg_at_5": 0.67396, "ndcg_at_10": 0.68986, "ndcg_at_20": 0.70191, "ndcg_at_100": 0.71599, "ndcg_at_1000": 0.72174, "map_at_1": 0.546, "map_at_3": 0.62, "map_at_5": 0.6361, "map_at_10": 0.64288, "map_at_20": 0.64642, "map_at_100": 0.64849, "map_at_1000": 0.64872, "recall_at_1": 0.546, "recall_at_3": 0.716, "recall_at_5": 0.788, "recall_at_10": 0.836, "recall_at_20": 0.882, "recall_at_100": 0.956, "recall_at_1000": 1.0, "precision_at_1": 0.546, "precision_at_3": 0.23867, "precision_at_5": 0.1576, "precision_at_10": 0.0836, "precision_at_20": 0.0441, "precision_at_100": 0.00956, "precision_at_1000": 0.001, "mrr_at_1": 0.548, "mrr_at_3": 0.6203333333333334, "mrr_at_5": 0.6367333333333329, "mrr_at_10": 0.6434666666666662, "mrr_at_20": 0.6470144954718479, "mrr_at_100": 0.6490851952778098, "mrr_at_1000": 0.6493156881011598, "naucs_at_1_max": 0.24633576399288384, "naucs_at_1_std": -0.2223154338956795, "naucs_at_1_diff1": 0.6591834210834445, "naucs_at_3_max": 0.36175195459160975, "naucs_at_3_std": -0.08434084789609861, "naucs_at_3_diff1": 0.5413592329290657, "naucs_at_5_max": 0.3101751726254579, "naucs_at_5_std": -0.12780573032527914, "naucs_at_5_diff1": 0.5441719537773002, "naucs_at_10_max": 0.3977732303153332, "naucs_at_10_std": -0.03636821770535881, "naucs_at_10_diff1": 0.5382985443943215, "naucs_at_20_max": 0.6622591725210398, "naucs_at_20_std": 0.26017697686059826, "naucs_at_20_diff1": 0.45866043763190284, "naucs_at_100_max": 0.893549326391487, "naucs_at_100_std": 0.7262965792377566, "naucs_at_100_diff1": 0.41690219846423626, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "docvqa_subsampled": {"ndcg_at_1": 0.2, "ndcg_at_3": 0.2659, "ndcg_at_5": 0.29973, "ndcg_at_10": 0.33819, "ndcg_at_20": 0.35639, "ndcg_at_100": 0.3948, "ndcg_at_1000": 0.42122, "map_at_1": 0.2, "map_at_3": 0.24933, "map_at_5": 0.26813, "map_at_10": 0.2838, "map_at_20": 0.2888, "map_at_100": 0.29417, "map_at_1000": 0.29516, "recall_at_1": 0.2, "recall_at_3": 0.314, "recall_at_5": 0.396, "recall_at_10": 0.516, "recall_at_20": 0.588, "recall_at_100": 0.794, "recall_at_1000": 1.0, "precision_at_1": 0.2, "precision_at_3": 0.10467, "precision_at_5": 0.0792, "precision_at_10": 0.0516, "precision_at_20": 0.0294, "precision_at_100": 0.00794, "precision_at_1000": 0.001, "mrr_at_1": 0.198, "mrr_at_3": 0.24899999999999986, "mrr_at_5": 0.26639999999999997, "mrr_at_10": 0.2826722222222223, "mrr_at_20": 0.2877351715192274, "mrr_at_100": 0.2930046189764325, "mrr_at_1000": 0.2939967523796152, "naucs_at_1_max": 0.2258426518112759, "naucs_at_1_std": -0.016493380690991176, "naucs_at_1_diff1": 0.49799965761911646, "naucs_at_3_max": 0.3104056776517514, "naucs_at_3_std": 0.09240537922113469, "naucs_at_3_diff1": 0.38644937393582046, "naucs_at_5_max": 0.3597126264092387, "naucs_at_5_std": 0.1764766577414579, "naucs_at_5_diff1": 0.3693136135912796, "naucs_at_10_max": 0.4557531098689828, "naucs_at_10_std": 0.3056914665929765, "naucs_at_10_diff1": 0.30113638267628107, "naucs_at_20_max": 0.48037271510149393, "naucs_at_20_std": 0.362469326680371, "naucs_at_20_diff1": 0.2606503125736123, "naucs_at_100_max": 0.6849545172368271, "naucs_at_100_std": 0.6495330524650836, "naucs_at_100_diff1": 0.32818980935171466, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "arxivqa_subsampled": {"ndcg_at_1": 0.464, "ndcg_at_3": 0.54274, "ndcg_at_5": 0.56494, "ndcg_at_10": 0.59419, "ndcg_at_20": 0.61634, "ndcg_at_100": 0.6426, "ndcg_at_1000": 0.64621, "map_at_1": 0.464, "map_at_3": 0.523, "map_at_5": 0.5353, "map_at_10": 0.54745, "map_at_20": 0.55347, "map_at_100": 0.5571, "map_at_1000": 0.55728, "recall_at_1": 0.464, "recall_at_3": 0.6, "recall_at_5": 0.654, "recall_at_10": 0.744, "recall_at_20": 0.832, "recall_at_100": 0.974, "recall_at_1000": 1.0, "precision_at_1": 0.464, "precision_at_3": 0.2, "precision_at_5": 0.1308, "precision_at_10": 0.0744, "precision_at_20": 0.0416, "precision_at_100": 0.00974, "precision_at_1000": 0.001, "mrr_at_1": 0.466, "mrr_at_3": 0.5240000000000001, "mrr_at_5": 0.5361000000000001, "mrr_at_10": 0.5482619047619046, "mrr_at_20": 0.5544001758045876, "mrr_at_100": 0.5579262752668372, "mrr_at_1000": 0.5581100898998852, "naucs_at_1_max": 0.39174526188776193, "naucs_at_1_std": -0.19776885342540754, "naucs_at_1_diff1": 0.629043919334302, "naucs_at_3_max": 0.3165996074228971, "naucs_at_3_std": -0.27436034431582085, "naucs_at_3_diff1": 0.5443034356824094, "naucs_at_5_max": 0.30353705337173154, "naucs_at_5_std": -0.31223778173599515, "naucs_at_5_diff1": 0.5368106159809729, "naucs_at_10_max": 0.27296301058677275, "naucs_at_10_std": -0.40394881650487086, "naucs_at_10_diff1": 0.5336611081025069, "naucs_at_20_max": 0.218393433986842, "naucs_at_20_std": -0.43578939955870744, "naucs_at_20_diff1": 0.483764567978014, "naucs_at_100_max": 0.4432208249913428, "naucs_at_100_std": -0.6542770954535672, "naucs_at_100_diff1": 0.5900161947807036, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": -0.0}, "tabfquad_subsampled": {"ndcg_at_1": 0.63214, "ndcg_at_3": 0.75076, "ndcg_at_5": 0.76906, "ndcg_at_10": 0.79036, "ndcg_at_20": 0.80322, "ndcg_at_100": 0.80465, "ndcg_at_1000": 0.80465, "map_at_1": 0.63214, "map_at_3": 0.72143, "map_at_5": 0.73196, "map_at_10": 0.74104, "map_at_20": 0.74468, "map_at_100": 0.74491, "map_at_1000": 0.74491, "recall_at_1": 0.63214, "recall_at_3": 0.83571, "recall_at_5": 0.87857, "recall_at_10": 0.94286, "recall_at_20": 0.99286, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.63214, "precision_at_3": 0.27857, "precision_at_5": 0.17571, "precision_at_10": 0.09429, "precision_at_20": 0.04964, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.6321428571428571, "mrr_at_3": 0.7220238095238098, "mrr_at_5": 0.732559523809524, "mrr_at_10": 0.7415688775510205, "mrr_at_20": 0.7451886866266567, "mrr_at_100": 0.7454230025752585, "mrr_at_1000": 0.7454230025752585, "naucs_at_1_max": 0.38646770813543463, "naucs_at_1_std": -0.15405576999792106, "naucs_at_1_diff1": 0.770483153687804, "naucs_at_3_max": 0.4715304076938923, "naucs_at_3_std": -0.05538441791811068, "naucs_at_3_diff1": 0.5690504012176135, "naucs_at_5_max": 0.5005477996778747, "naucs_at_5_std": -0.05779501883398043, "naucs_at_5_diff1": 0.5311432702346002, "naucs_at_10_max": 0.5665646111001326, "naucs_at_10_std": -0.14066876750700116, "naucs_at_10_diff1": 0.579542959824386, "naucs_at_20_max": 0.5639455856997528, "naucs_at_20_std": 0.8611111111111035, "naucs_at_20_diff1": 0.41985116196939604, "naucs_at_100_max": -0.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": -0.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "tatdqa": {"ndcg_at_1": 0.20385, "ndcg_at_3": 0.29367, "ndcg_at_5": 0.33414, "ndcg_at_10": 0.38255, "ndcg_at_20": 0.42025, "ndcg_at_100": 0.45904, "ndcg_at_1000": 0.46423, "map_at_1": 0.20385, "map_at_3": 0.2708, "map_at_5": 0.29316, "map_at_10": 0.31302, "map_at_20": 0.32337, "map_at_100": 0.32908, "map_at_1000": 0.32933, "recall_at_1": 0.20385, "recall_at_3": 0.36019, "recall_at_5": 0.45881, "recall_at_10": 0.60914, "recall_at_20": 0.75827, "recall_at_100": 0.96212, "recall_at_1000": 1.0, "precision_at_1": 0.20385, "precision_at_3": 0.12006, "precision_at_5": 0.09176, "precision_at_10": 0.06091, "precision_at_20": 0.03791, "precision_at_100": 0.00962, "precision_at_1000": 0.001, "mrr_at_1": 0.20565243535778713, "mrr_at_3": 0.27189817598717125, "mrr_at_5": 0.2949589096011215, "mrr_at_10": 0.3140590250933003, "mrr_at_20": 0.3243668643319153, "mrr_at_100": 0.330131124419447, "mrr_at_1000": 0.3303795355815673, "naucs_at_1_max": 0.053143613279064535, "naucs_at_1_std": -0.14737481207092906, "naucs_at_1_diff1": 0.2721691321865769, "naucs_at_3_max": 0.061009773438048504, "naucs_at_3_std": -0.16493988734649304, "naucs_at_3_diff1": 0.1581663877851112, "naucs_at_5_max": 0.10526555258170192, "naucs_at_5_std": -0.14858934849303054, "naucs_at_5_diff1": 0.1462139517163512, "naucs_at_10_max": 0.20118614671927704, "naucs_at_10_std": -0.06748566440997046, "naucs_at_10_diff1": 0.06717152271092251, "naucs_at_20_max": 0.3405327527321, "naucs_at_20_std": 0.02827223144042943, "naucs_at_20_diff1": 0.02230439840528239, "naucs_at_100_max": 0.7336818476431518, "naucs_at_100_std": 0.638654029156295, "naucs_at_100_diff1": 0.2602460053507242, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "shift_project": {"ndcg_at_1": 0.34, "ndcg_at_3": 0.40047, "ndcg_at_5": 0.43704, "ndcg_at_10": 0.47408, "ndcg_at_20": 0.50402, "ndcg_at_100": 0.53323, "ndcg_at_1000": 0.5442, "map_at_1": 0.34, "map_at_3": 0.38667, "map_at_5": 0.40667, "map_at_10": 0.42282, "map_at_20": 0.43082, "map_at_100": 0.43465, "map_at_1000": 0.43518, "recall_at_1": 0.34, "recall_at_3": 0.44, "recall_at_5": 0.53, "recall_at_10": 0.64, "recall_at_20": 0.76, "recall_at_100": 0.92, "recall_at_1000": 1.0, "precision_at_1": 0.34, "precision_at_3": 0.14667, "precision_at_5": 0.106, "precision_at_10": 0.064, "precision_at_20": 0.038, "precision_at_100": 0.0092, "precision_at_1000": 0.001, "mrr_at_1": 0.34, "mrr_at_3": 0.38666666666666666, "mrr_at_5": 0.41116666666666674, "mrr_at_10": 0.4255912698412699, "mrr_at_20": 0.43316782194413783, "mrr_at_100": 0.43710276049993996, "mrr_at_1000": 0.4376442513510576, "naucs_at_1_max": -0.07132528035307659, "naucs_at_1_std": -0.28039958663451625, "naucs_at_1_diff1": 0.4762828451227933, "naucs_at_3_max": -0.05471935800275327, "naucs_at_3_std": -0.2769744740825807, "naucs_at_3_diff1": 0.41559115837216676, "naucs_at_5_max": -0.2575223921084339, "naucs_at_5_std": -0.4371606482563329, "naucs_at_5_diff1": 0.34598549622214064, "naucs_at_10_max": -0.383816400740656, "naucs_at_10_std": -0.5900338485119421, "naucs_at_10_diff1": 0.41202909447973185, "naucs_at_20_max": -0.3554185063876152, "naucs_at_20_std": -0.6295318854373965, "naucs_at_20_diff1": 0.3329256757553939, "naucs_at_100_max": 0.012537707390648677, "naucs_at_100_std": -0.14256535947712445, "naucs_at_100_diff1": 0.19171033524016706, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
{
|
4 |
+
"content": "<image>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false
|
9 |
+
}
|
10 |
+
],
|
11 |
+
"bos_token": {
|
12 |
+
"content": "<bos>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false
|
17 |
+
},
|
18 |
+
"eos_token": {
|
19 |
+
"content": "<eos>",
|
20 |
+
"lstrip": false,
|
21 |
+
"normalized": false,
|
22 |
+
"rstrip": false,
|
23 |
+
"single_word": false
|
24 |
+
},
|
25 |
+
"pad_token": {
|
26 |
+
"content": "<pad>",
|
27 |
+
"lstrip": false,
|
28 |
+
"normalized": false,
|
29 |
+
"rstrip": false,
|
30 |
+
"single_word": false
|
31 |
+
},
|
32 |
+
"unk_token": {
|
33 |
+
"content": "<unk>",
|
34 |
+
"lstrip": false,
|
35 |
+
"normalized": false,
|
36 |
+
"rstrip": false,
|
37 |
+
"single_word": false
|
38 |
+
}
|
39 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1df2ab04780faccf51a881d7c5a7026cc6f979083af2eebf709d051b8d47134b
|
3 |
+
size 17763458
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8986bb4f423f07f8c7f70d0dbe3526fb2316056c17bae71b1ea975e77a168fc6
|
3 |
+
size 4264023
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
training_config.yml
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
config:
|
2 |
+
(): custom_colbert.utils.train_custom_colbert_models.ColModelTrainingConfig
|
3 |
+
output_dir: !path ../../../models/without_tabfquad_no_pairwise/train_bipali_mean-3b-mix-448
|
4 |
+
processor:
|
5 |
+
() : custom_colbert.utils.wrapper.AutoProcessorWrapper
|
6 |
+
pretrained_model_name_or_path: "./models/paligemma-3b-mix-448"
|
7 |
+
max_length: 50
|
8 |
+
model:
|
9 |
+
(): custom_colbert.utils.wrapper.AutoColModelWrapper
|
10 |
+
pretrained_model_name_or_path: "./models/paligemma-3b-mix-448"
|
11 |
+
training_objective: "biencoder_mean"
|
12 |
+
# attn_implementation: "eager"
|
13 |
+
torch_dtype: !ext torch.bfloat16
|
14 |
+
# device_map: "auto"
|
15 |
+
# quantization_config:
|
16 |
+
# (): transformers.BitsAndBytesConfig
|
17 |
+
# load_in_4bit: true
|
18 |
+
# bnb_4bit_quant_type: "nf4"
|
19 |
+
# bnb_4bit_compute_dtype: "bfloat16"
|
20 |
+
# bnb_4bit_use_double_quant: true
|
21 |
+
|
22 |
+
dataset_loading_func: !ext custom_colbert.utils.dataset_transformation.load_train_set
|
23 |
+
eval_dataset_loader: !import ../data/test_data.yaml
|
24 |
+
|
25 |
+
max_length: 50
|
26 |
+
run_eval: true
|
27 |
+
add_suffix: true
|
28 |
+
loss_func:
|
29 |
+
(): custom_colbert.loss.colbert_loss.BiEncoderLoss
|
30 |
+
tr_args: !import ../tr_args/default_tr_args.yaml
|
31 |
+
peft_config:
|
32 |
+
(): peft.LoraConfig
|
33 |
+
r: 32
|
34 |
+
lora_alpha: 32
|
35 |
+
lora_dropout: 0.1
|
36 |
+
init_lora_weights: "gaussian"
|
37 |
+
bias: "none"
|
38 |
+
task_type: "FEATURE_EXTRACTION"
|
39 |
+
target_modules: '(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$)'
|
40 |
+
# target_modules: '(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$'
|
41 |
+
|