Upload folder using huggingface_hub
Browse files- adapter_config.json +26 -0
- adapter_model.safetensors +3 -0
- added_tokens.json +5 -0
- git_hash.txt +1 -0
- preprocessor_config.json +26 -0
- processor_config.json +4 -0
- results.json +1 -0
- special_tokens_map.json +53 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +72 -0
- training_config.yml +41 -0
adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "./models/idefics2-8b",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": false,
|
8 |
+
"init_lora_weights": "gaussian",
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 32,
|
14 |
+
"lora_dropout": 0.1,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": null,
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 32,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": ".*(text_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$",
|
23 |
+
"task_type": "FEATURE_EXTRACTION",
|
24 |
+
"use_dora": false,
|
25 |
+
"use_rslora": false
|
26 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:875109c2bc82a61d0d246082d1adf1dff4537e0c83d9a6cd45b9a6305dfc6ce9
|
3 |
+
size 167837616
|
added_tokens.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<end_of_utterance>": 32002,
|
3 |
+
"<fake_token_around_image>": 32000,
|
4 |
+
"<image>": 32001
|
5 |
+
}
|
git_hash.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
6b50ed272234b6cab12154dac357307c9ee93783
|
preprocessor_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_convert_rgb": true,
|
3 |
+
"do_image_splitting": false,
|
4 |
+
"do_normalize": true,
|
5 |
+
"do_pad": true,
|
6 |
+
"do_rescale": true,
|
7 |
+
"do_resize": true,
|
8 |
+
"image_mean": [
|
9 |
+
0.5,
|
10 |
+
0.5,
|
11 |
+
0.5
|
12 |
+
],
|
13 |
+
"image_processor_type": "Idefics2ImageProcessor",
|
14 |
+
"image_std": [
|
15 |
+
0.5,
|
16 |
+
0.5,
|
17 |
+
0.5
|
18 |
+
],
|
19 |
+
"processor_class": "Idefics2Processor",
|
20 |
+
"resample": 2,
|
21 |
+
"rescale_factor": 0.00392156862745098,
|
22 |
+
"size": {
|
23 |
+
"longest_edge": 980,
|
24 |
+
"shortest_edge": 378
|
25 |
+
}
|
26 |
+
}
|
processor_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"image_seq_len": 64,
|
3 |
+
"processor_class": "Idefics2Processor"
|
4 |
+
}
|
results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation_set": {"ndcg_at_1": 0.758, "ndcg_at_3": 0.82121, "ndcg_at_5": 0.8386, "ndcg_at_10": 0.84778, "ndcg_at_20": 0.85272, "ndcg_at_100": 0.85878, "ndcg_at_1000": 0.86067, "map_at_1": 0.758, "map_at_3": 0.80633, "map_at_5": 0.81603, "map_at_10": 0.8199, "map_at_20": 0.8212, "map_at_100": 0.82208, "map_at_1000": 0.82217, "recall_at_1": 0.758, "recall_at_3": 0.864, "recall_at_5": 0.906, "recall_at_10": 0.934, "recall_at_20": 0.954, "recall_at_100": 0.986, "recall_at_1000": 1.0, "precision_at_1": 0.758, "precision_at_3": 0.288, "precision_at_5": 0.1812, "precision_at_10": 0.0934, "precision_at_20": 0.0477, "precision_at_100": 0.00986, "precision_at_1000": 0.001, "mrr_at_1": 0.756, "mrr_at_3": 0.8043333333333331, "mrr_at_5": 0.8149333333333332, "mrr_at_10": 0.8189071428571426, "mrr_at_20": 0.8204020799032408, "mrr_at_100": 0.8212267679558368, "mrr_at_1000": 0.8212914171901723, "naucs_at_1_max": 0.09070812328044268, "naucs_at_1_std": -0.18636623446948097, "naucs_at_1_diff1": 0.8715955720098622, "naucs_at_3_max": 0.16909138817232008, "naucs_at_3_std": -0.04958895147067073, "naucs_at_3_diff1": 0.8514578005834814, "naucs_at_5_max": 0.22705522050098376, "naucs_at_5_std": 0.15102211097204807, "naucs_at_5_diff1": 0.8850586385638123, "naucs_at_10_max": 0.2014048739910066, "naucs_at_10_std": 0.26047590753473066, "naucs_at_10_diff1": 0.9101009072466025, "naucs_at_20_max": 0.13053583605398209, "naucs_at_20_std": 0.32115454877602884, "naucs_at_20_diff1": 0.926967381320062, "naucs_at_100_max": 0.47242002712997266, "naucs_at_100_std": 0.6576630652260723, "naucs_at_100_diff1": 0.9822713895684675, "naucs_at_1000_max": -0.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": -0.0}, "syntheticDocQA_energy": {"ndcg_at_1": 0.8, "ndcg_at_3": 0.86047, "ndcg_at_5": 0.86865, "ndcg_at_10": 0.87865, "ndcg_at_20": 0.88378, "ndcg_at_100": 0.88942, "ndcg_at_1000": 0.88942, "map_at_1": 0.8, "map_at_3": 0.84667, "map_at_5": 0.85117, "map_at_10": 0.85545, "map_at_20": 0.85689, "map_at_100": 0.85771, "map_at_1000": 0.85771, "recall_at_1": 0.8, "recall_at_3": 0.9, "recall_at_5": 0.92, "recall_at_10": 0.95, "recall_at_20": 0.97, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.8, "precision_at_3": 0.3, "precision_at_5": 0.184, "precision_at_10": 0.095, "precision_at_20": 0.0485, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.81, "mrr_at_3": 0.8533333333333333, "mrr_at_5": 0.8578333333333333, "mrr_at_10": 0.8621190476190477, "mrr_at_20": 0.8637216117216117, "mrr_at_100": 0.8645685814185814, "mrr_at_1000": 0.8645685814185814, "naucs_at_1_max": 0.5412464670054805, "naucs_at_1_std": 0.015859140859140947, "naucs_at_1_diff1": 0.7537132216885911, "naucs_at_3_max": 0.5776779402463085, "naucs_at_3_std": -0.039682539682540215, "naucs_at_3_diff1": 0.6963333035941722, "naucs_at_5_max": 0.5231595168111531, "naucs_at_5_std": -0.1846405228758136, "naucs_at_5_diff1": 0.6743721733633273, "naucs_at_10_max": 0.7189542483660156, "naucs_at_10_std": -0.09458450046684587, "naucs_at_10_diff1": 0.5579450517699609, "naucs_at_20_max": 0.6677559912854047, "naucs_at_20_std": 0.04154995331466144, "naucs_at_20_diff1": 0.40712320327156726, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_healthcare_industry": {"ndcg_at_1": 0.84, "ndcg_at_3": 0.90417, "ndcg_at_5": 0.9119, "ndcg_at_10": 0.92148, "ndcg_at_20": 0.92148, "ndcg_at_100": 0.92148, "ndcg_at_1000": 0.92148, "map_at_1": 0.84, "map_at_3": 0.88833, "map_at_5": 0.89233, "map_at_10": 0.89622, "map_at_20": 0.89622, "map_at_100": 0.89622, "map_at_1000": 0.89622, "recall_at_1": 0.84, "recall_at_3": 0.95, "recall_at_5": 0.97, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.84, "precision_at_3": 0.31667, "precision_at_5": 0.194, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.86, "mrr_at_3": 0.8983333333333333, "mrr_at_5": 0.9023333333333333, "mrr_at_10": 0.9069166666666668, "mrr_at_20": 0.9069166666666668, "mrr_at_100": 0.9069166666666668, "mrr_at_1000": 0.9069166666666668, "naucs_at_1_max": 0.23650544880887278, "naucs_at_1_std": -0.09815292096219846, "naucs_at_1_diff1": 0.8343088556016764, "naucs_at_3_max": -0.1240046665605388, "naucs_at_3_std": -0.5948646125116709, "naucs_at_3_diff1": 0.7206090754287047, "naucs_at_5_max": 0.12567209949647848, "naucs_at_5_std": -0.5308123249299689, "naucs_at_5_diff1": 0.8191004927952061, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 0.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 0.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.9, "ndcg_at_3": 0.95547, "ndcg_at_5": 0.95547, "ndcg_at_10": 0.95848, "ndcg_at_20": 0.95848, "ndcg_at_100": 0.95848, "ndcg_at_1000": 0.95848, "map_at_1": 0.9, "map_at_3": 0.94333, "map_at_5": 0.94333, "map_at_10": 0.94444, "map_at_20": 0.94444, "map_at_100": 0.94444, "map_at_1000": 0.94444, "recall_at_1": 0.9, "recall_at_3": 0.99, "recall_at_5": 0.99, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.9, "precision_at_3": 0.33, "precision_at_5": 0.198, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.92, "mrr_at_3": 0.9533333333333335, "mrr_at_5": 0.9533333333333335, "mrr_at_10": 0.9544444444444445, "mrr_at_20": 0.9544444444444445, "mrr_at_100": 0.9544444444444445, "mrr_at_1000": 0.9544444444444445, "naucs_at_1_max": -0.018224149889179524, "naucs_at_1_std": -0.08870214752567797, "naucs_at_1_diff1": 0.8273781886768067, "naucs_at_3_max": 0.8692810457516159, "naucs_at_3_std": 0.5541549953314585, "naucs_at_3_diff1": 0.8692810457516356, "naucs_at_5_max": 0.8692810457516413, "naucs_at_5_std": 0.5541549953314738, "naucs_at_5_diff1": 0.8692810457516413, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_government_reports": {"ndcg_at_1": 0.74, "ndcg_at_3": 0.85333, "ndcg_at_5": 0.86581, "ndcg_at_10": 0.8765, "ndcg_at_20": 0.8792, "ndcg_at_100": 0.8792, "ndcg_at_1000": 0.8792, "map_at_1": 0.74, "map_at_3": 0.82667, "map_at_5": 0.83367, "map_at_10": 0.83867, "map_at_20": 0.8395, "map_at_100": 0.8395, "map_at_1000": 0.8395, "recall_at_1": 0.74, "recall_at_3": 0.93, "recall_at_5": 0.96, "recall_at_10": 0.99, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.74, "precision_at_3": 0.31, "precision_at_5": 0.192, "precision_at_10": 0.099, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.75, "mrr_at_3": 0.8333333333333331, "mrr_at_5": 0.8403333333333332, "mrr_at_10": 0.8453333333333332, "mrr_at_20": 0.8462424242424241, "mrr_at_100": 0.8462424242424241, "mrr_at_1000": 0.8462424242424241, "naucs_at_1_max": 0.19333798979876674, "naucs_at_1_std": -0.16738980576618956, "naucs_at_1_diff1": 0.7124868111210353, "naucs_at_3_max": 0.6455055797481293, "naucs_at_3_std": -0.20288115246098487, "naucs_at_3_diff1": 0.577849691523067, "naucs_at_5_max": 0.5715378767452046, "naucs_at_5_std": -0.0402661064425746, "naucs_at_5_diff1": 0.3629072579140961, "naucs_at_10_max": 0.8692810457516413, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 0.869281045751625, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 0.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "infovqa_subsampled": {"ndcg_at_1": 0.752, "ndcg_at_3": 0.80847, "ndcg_at_5": 0.82423, "ndcg_at_10": 0.83687, "ndcg_at_20": 0.84149, "ndcg_at_100": 0.84739, "ndcg_at_1000": 0.85109, "map_at_1": 0.752, "map_at_3": 0.796, "map_at_5": 0.8048, "map_at_10": 0.80985, "map_at_20": 0.81115, "map_at_100": 0.81194, "map_at_1000": 0.8121, "recall_at_1": 0.752, "recall_at_3": 0.844, "recall_at_5": 0.882, "recall_at_10": 0.922, "recall_at_20": 0.94, "recall_at_100": 0.972, "recall_at_1000": 1.0, "precision_at_1": 0.752, "precision_at_3": 0.28133, "precision_at_5": 0.1764, "precision_at_10": 0.0922, "precision_at_20": 0.047, "precision_at_100": 0.00972, "precision_at_1000": 0.001, "mrr_at_1": 0.752, "mrr_at_3": 0.7966666666666665, "mrr_at_5": 0.8044666666666664, "mrr_at_10": 0.8096928571428572, "mrr_at_20": 0.8109839466089466, "mrr_at_100": 0.811791112567069, "mrr_at_1000": 0.8119483879813215, "naucs_at_1_max": 0.401354434669786, "naucs_at_1_std": 0.0010491808804686878, "naucs_at_1_diff1": 0.8240401560855564, "naucs_at_3_max": 0.49084514178984795, "naucs_at_3_std": 0.08193498957522397, "naucs_at_3_diff1": 0.8042663360745878, "naucs_at_5_max": 0.5628244129345925, "naucs_at_5_std": 0.18358927158520252, "naucs_at_5_diff1": 0.7781499586766933, "naucs_at_10_max": 0.6770719495217774, "naucs_at_10_std": 0.33144197850080187, "naucs_at_10_diff1": 0.7826336820469839, "naucs_at_20_max": 0.7396021003656751, "naucs_at_20_std": 0.4750855897914696, "naucs_at_20_diff1": 0.7877345907526897, "naucs_at_100_max": 0.8143611118848274, "naucs_at_100_std": 0.6599639855942331, "naucs_at_100_diff1": 0.7663405103249586, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "docvqa_subsampled": {"ndcg_at_1": 0.394, "ndcg_at_3": 0.45764, "ndcg_at_5": 0.48028, "ndcg_at_10": 0.5058, "ndcg_at_20": 0.51993, "ndcg_at_100": 0.54377, "ndcg_at_1000": 0.56637, "map_at_1": 0.394, "map_at_3": 0.44167, "map_at_5": 0.45447, "map_at_10": 0.4648, "map_at_20": 0.46866, "map_at_100": 0.47201, "map_at_1000": 0.47282, "recall_at_1": 0.394, "recall_at_3": 0.504, "recall_at_5": 0.558, "recall_at_10": 0.638, "recall_at_20": 0.694, "recall_at_100": 0.822, "recall_at_1000": 1.0, "precision_at_1": 0.394, "precision_at_3": 0.168, "precision_at_5": 0.1116, "precision_at_10": 0.0638, "precision_at_20": 0.0347, "precision_at_100": 0.00822, "precision_at_1000": 0.001, "mrr_at_1": 0.386, "mrr_at_3": 0.4386666666666668, "mrr_at_5": 0.45246666666666674, "mrr_at_10": 0.461768253968254, "mrr_at_20": 0.4657159871088663, "mrr_at_100": 0.46914092028832916, "mrr_at_1000": 0.46986149736935945, "naucs_at_1_max": 0.3318629744408341, "naucs_at_1_std": 0.6669248174043203, "naucs_at_1_diff1": 0.7346870104004866, "naucs_at_3_max": 0.26294857917871794, "naucs_at_3_std": 0.7376183769412721, "naucs_at_3_diff1": 0.6398024921713373, "naucs_at_5_max": 0.2299847172467665, "naucs_at_5_std": 0.7741292002921725, "naucs_at_5_diff1": 0.5822162492438375, "naucs_at_10_max": 0.15927618266246538, "naucs_at_10_std": 0.8134476032767242, "naucs_at_10_diff1": 0.5606666326910011, "naucs_at_20_max": 0.12411863967319646, "naucs_at_20_std": 0.8409420157794703, "naucs_at_20_diff1": 0.5344358808968045, "naucs_at_100_max": -0.026062589079265428, "naucs_at_100_std": 0.8537989635351115, "naucs_at_100_diff1": 0.44933419288088144, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "arxivqa_subsampled": {"ndcg_at_1": 0.66, "ndcg_at_3": 0.7239, "ndcg_at_5": 0.73639, "ndcg_at_10": 0.76075, "ndcg_at_20": 0.7709, "ndcg_at_100": 0.78437, "ndcg_at_1000": 0.78631, "map_at_1": 0.66, "map_at_3": 0.708, "map_at_5": 0.715, "map_at_10": 0.72532, "map_at_20": 0.72812, "map_at_100": 0.73001, "map_at_1000": 0.7301, "recall_at_1": 0.66, "recall_at_3": 0.77, "recall_at_5": 0.8, "recall_at_10": 0.874, "recall_at_20": 0.914, "recall_at_100": 0.986, "recall_at_1000": 1.0, "precision_at_1": 0.66, "precision_at_3": 0.25667, "precision_at_5": 0.16, "precision_at_10": 0.0874, "precision_at_20": 0.0457, "precision_at_100": 0.00986, "precision_at_1000": 0.001, "mrr_at_1": 0.664, "mrr_at_3": 0.7093333333333333, "mrr_at_5": 0.7168333333333331, "mrr_at_10": 0.7270269841269841, "mrr_at_20": 0.7297596328508092, "mrr_at_100": 0.7319036103695236, "mrr_at_1000": 0.7319989049552974, "naucs_at_1_max": 0.49633256748701654, "naucs_at_1_std": 0.015168110458361991, "naucs_at_1_diff1": 0.8455697411134462, "naucs_at_3_max": 0.5620320392333852, "naucs_at_3_std": 0.08454600389427307, "naucs_at_3_diff1": 0.8026018506222728, "naucs_at_5_max": 0.583298496441478, "naucs_at_5_std": 0.11286213786213735, "naucs_at_5_diff1": 0.7831582929045984, "naucs_at_10_max": 0.6562088873755272, "naucs_at_10_std": 0.14066379297214054, "naucs_at_10_diff1": 0.7666117950661796, "naucs_at_20_max": 0.7169818694056063, "naucs_at_20_std": 0.23062558356675675, "naucs_at_20_diff1": 0.7555430302157831, "naucs_at_100_max": 0.960411256418003, "naucs_at_100_std": 0.539148992930495, "naucs_at_100_diff1": 0.7231987344748477, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "tabfquad_subsampled": {"ndcg_at_1": 0.725, "ndcg_at_3": 0.80463, "ndcg_at_5": 0.81647, "ndcg_at_10": 0.83315, "ndcg_at_20": 0.84207, "ndcg_at_100": 0.84713, "ndcg_at_1000": 0.84713, "map_at_1": 0.725, "map_at_3": 0.78631, "map_at_5": 0.79292, "map_at_10": 0.79943, "map_at_20": 0.80183, "map_at_100": 0.80268, "map_at_1000": 0.80268, "recall_at_1": 0.725, "recall_at_3": 0.85714, "recall_at_5": 0.88571, "recall_at_10": 0.93929, "recall_at_20": 0.975, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.725, "precision_at_3": 0.28571, "precision_at_5": 0.17714, "precision_at_10": 0.09393, "precision_at_20": 0.04875, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.7285714285714285, "mrr_at_3": 0.7886904761904763, "mrr_at_5": 0.7954761904761904, "mrr_at_10": 0.8015107709750566, "mrr_at_20": 0.8040490476493627, "mrr_at_100": 0.8050514494124057, "mrr_at_1000": 0.8050514494124057, "naucs_at_1_max": 0.3082396765618701, "naucs_at_1_std": 0.13916417125982944, "naucs_at_1_diff1": 0.7871775290829601, "naucs_at_3_max": 0.4708417670390566, "naucs_at_3_std": 0.3116253644314879, "naucs_at_3_diff1": 0.7135374718532007, "naucs_at_5_max": 0.4058226860726871, "naucs_at_5_std": 0.2683614143331763, "naucs_at_5_diff1": 0.6703666772448862, "naucs_at_10_max": 0.36817364636125977, "naucs_at_10_std": 0.2994178063382223, "naucs_at_10_diff1": 0.5978772328046428, "naucs_at_20_max": 0.5890449777669317, "naucs_at_20_std": 0.6119114312391624, "naucs_at_20_diff1": 0.3887797762117147, "naucs_at_100_max": -0.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": -0.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "tatdqa": {"ndcg_at_1": 0.47745, "ndcg_at_3": 0.59847, "ndcg_at_5": 0.62993, "ndcg_at_10": 0.66499, "ndcg_at_20": 0.67988, "ndcg_at_100": 0.68951, "ndcg_at_1000": 0.69203, "map_at_1": 0.47745, "map_at_3": 0.56825, "map_at_5": 0.58557, "map_at_10": 0.59995, "map_at_20": 0.60411, "map_at_100": 0.60547, "map_at_1000": 0.6056, "recall_at_1": 0.47745, "recall_at_3": 0.68611, "recall_at_5": 0.76308, "recall_at_10": 0.87192, "recall_at_20": 0.93025, "recall_at_100": 0.98196, "recall_at_1000": 1.0, "precision_at_1": 0.47745, "precision_at_3": 0.2287, "precision_at_5": 0.15262, "precision_at_10": 0.08719, "precision_at_20": 0.04651, "precision_at_100": 0.00982, "precision_at_1000": 0.001, "mrr_at_1": 0.47805171377029465, "mrr_at_3": 0.56784926839046, "mrr_at_5": 0.5862798155943094, "mrr_at_10": 0.6004106653685739, "mrr_at_20": 0.60466289702996, "mrr_at_100": 0.6058893789422248, "mrr_at_1000": 0.6060269903186414, "naucs_at_1_max": 0.15083203147100246, "naucs_at_1_std": -0.04802302828173909, "naucs_at_1_diff1": 0.6211565964174599, "naucs_at_3_max": 0.18246362774196792, "naucs_at_3_std": 0.020235523965394693, "naucs_at_3_diff1": 0.4881827797009872, "naucs_at_5_max": 0.16998681876320373, "naucs_at_5_std": 0.017808906249241557, "naucs_at_5_diff1": 0.479659535540913, "naucs_at_10_max": 0.20122875990012343, "naucs_at_10_std": 0.08339049970662891, "naucs_at_10_diff1": 0.48514372908390346, "naucs_at_20_max": 0.37483353821055365, "naucs_at_20_std": 0.28851875825709916, "naucs_at_20_diff1": 0.5090533405120935, "naucs_at_100_max": 0.6314194263317944, "naucs_at_100_std": 0.5976344416036061, "naucs_at_100_diff1": 0.6795642527577944, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "shift_project": {"ndcg_at_1": 0.36, "ndcg_at_3": 0.53226, "ndcg_at_5": 0.57226, "ndcg_at_10": 0.5934, "ndcg_at_20": 0.61646, "ndcg_at_100": 0.62934, "ndcg_at_1000": 0.63083, "map_at_1": 0.36, "map_at_3": 0.48833, "map_at_5": 0.50983, "map_at_10": 0.51769, "map_at_20": 0.52417, "map_at_100": 0.52589, "map_at_1000": 0.52599, "recall_at_1": 0.36, "recall_at_3": 0.66, "recall_at_5": 0.76, "recall_at_10": 0.83, "recall_at_20": 0.92, "recall_at_100": 0.99, "recall_at_1000": 1.0, "precision_at_1": 0.36, "precision_at_3": 0.22, "precision_at_5": 0.152, "precision_at_10": 0.083, "precision_at_20": 0.046, "precision_at_100": 0.0099, "precision_at_1000": 0.001, "mrr_at_1": 0.38, "mrr_at_3": 0.5066666666666666, "mrr_at_5": 0.5281666666666667, "mrr_at_10": 0.5369285714285715, "mrr_at_20": 0.5422229437229439, "mrr_at_100": 0.5441099386724387, "mrr_at_1000": 0.5442060925185926, "naucs_at_1_max": -0.05930838047189907, "naucs_at_1_std": -0.15656327087597757, "naucs_at_1_diff1": 0.4368112963614647, "naucs_at_3_max": -0.01762658919840709, "naucs_at_3_std": -0.11787646453582858, "naucs_at_3_diff1": 0.40371351806708616, "naucs_at_5_max": 0.27222579290681453, "naucs_at_5_std": 0.02100458596521666, "naucs_at_5_diff1": 0.5338907890527974, "naucs_at_10_max": 0.47491264594411636, "naucs_at_10_std": 0.19814825564520944, "naucs_at_10_diff1": 0.6138466670584348, "naucs_at_20_max": 0.2852743345442196, "naucs_at_20_std": 0.09984827264239152, "naucs_at_20_diff1": 0.6238072324160416, "naucs_at_100_max": -0.16753158468153767, "naucs_at_100_std": -0.17133520074697067, "naucs_at_100_diff1": 0.5793801391524361, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
{
|
4 |
+
"content": "<fake_token_around_image>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"content": "<image>",
|
12 |
+
"lstrip": false,
|
13 |
+
"normalized": false,
|
14 |
+
"rstrip": false,
|
15 |
+
"single_word": false
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"content": "<end_of_utterance>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
],
|
25 |
+
"bos_token": {
|
26 |
+
"content": "<s>",
|
27 |
+
"lstrip": false,
|
28 |
+
"normalized": false,
|
29 |
+
"rstrip": false,
|
30 |
+
"single_word": false
|
31 |
+
},
|
32 |
+
"eos_token": {
|
33 |
+
"content": "</s>",
|
34 |
+
"lstrip": false,
|
35 |
+
"normalized": false,
|
36 |
+
"rstrip": false,
|
37 |
+
"single_word": false
|
38 |
+
},
|
39 |
+
"pad_token": {
|
40 |
+
"content": "<unk>",
|
41 |
+
"lstrip": false,
|
42 |
+
"normalized": false,
|
43 |
+
"rstrip": false,
|
44 |
+
"single_word": false
|
45 |
+
},
|
46 |
+
"unk_token": {
|
47 |
+
"content": "<unk>",
|
48 |
+
"lstrip": false,
|
49 |
+
"normalized": false,
|
50 |
+
"rstrip": false,
|
51 |
+
"single_word": false
|
52 |
+
}
|
53 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
|
3 |
+
size 493443
|
tokenizer_config.json
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
"32000": {
|
30 |
+
"content": "<fake_token_around_image>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": false,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": true
|
36 |
+
},
|
37 |
+
"32001": {
|
38 |
+
"content": "<image>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": true
|
44 |
+
},
|
45 |
+
"32002": {
|
46 |
+
"content": "<end_of_utterance>",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false,
|
51 |
+
"special": true
|
52 |
+
}
|
53 |
+
},
|
54 |
+
"additional_special_tokens": [
|
55 |
+
"<fake_token_around_image>",
|
56 |
+
"<image>",
|
57 |
+
"<end_of_utterance>"
|
58 |
+
],
|
59 |
+
"bos_token": "<s>",
|
60 |
+
"clean_up_tokenization_spaces": false,
|
61 |
+
"do_image_splitting": false,
|
62 |
+
"eos_token": "</s>",
|
63 |
+
"legacy": false,
|
64 |
+
"model_max_length": 1000000000000000019884624838656,
|
65 |
+
"pad_token": "<unk>",
|
66 |
+
"processor_class": "Idefics2Processor",
|
67 |
+
"sp_model_kwargs": {},
|
68 |
+
"spaces_between_special_tokens": false,
|
69 |
+
"tokenizer_class": "LlamaTokenizer",
|
70 |
+
"unk_token": "<unk>",
|
71 |
+
"use_default_system_prompt": true
|
72 |
+
}
|
training_config.yml
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
config:
|
2 |
+
(): custom_colbert.utils.train_custom_colbert_models.ColModelTrainingConfig
|
3 |
+
output_dir: !path ../../../models/without_tabfquad/train_colidefics2-60-longrun
|
4 |
+
processor:
|
5 |
+
() : custom_colbert.utils.wrapper.AutoProcessorWrapper
|
6 |
+
pretrained_model_name_or_path: "./models/idefics2-8b"
|
7 |
+
do_image_splitting: false
|
8 |
+
model:
|
9 |
+
(): custom_colbert.utils.wrapper.AutoColModelWrapper
|
10 |
+
pretrained_model_name_or_path: "./models/idefics2-8b"
|
11 |
+
training_objective: "colbertv1"
|
12 |
+
# attn_implementation: "eager"
|
13 |
+
torch_dtype: !ext torch.bfloat16
|
14 |
+
# device_map: "auto"
|
15 |
+
# quantization_config:
|
16 |
+
# (): transformers.BitsAndBytesConfig
|
17 |
+
# load_in_4bit: true
|
18 |
+
# bnb_4bit_quant_type: "nf4"
|
19 |
+
# bnb_4bit_compute_dtype: "bfloat16"
|
20 |
+
# bnb_4bit_use_double_quant: true
|
21 |
+
|
22 |
+
dataset_loading_func: !ext custom_colbert.utils.dataset_transformation.load_train_set
|
23 |
+
eval_dataset_loader: !import ../data/test_data.yaml
|
24 |
+
|
25 |
+
max_length: 50
|
26 |
+
run_eval: true
|
27 |
+
add_suffix: true
|
28 |
+
loss_func:
|
29 |
+
(): custom_colbert.loss.colbert_loss.ColbertPairwiseCELoss
|
30 |
+
tr_args: !import ../tr_args/default_tr_args.yaml
|
31 |
+
peft_config:
|
32 |
+
(): peft.LoraConfig
|
33 |
+
r: 32
|
34 |
+
lora_alpha: 32
|
35 |
+
lora_dropout: 0.1
|
36 |
+
init_lora_weights: "gaussian"
|
37 |
+
bias: "none"
|
38 |
+
task_type: "FEATURE_EXTRACTION"
|
39 |
+
target_modules: '.*(text_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$'
|
40 |
+
# target_modules: '(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)'
|
41 |
+
|