Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- adapter_config.json +26 -0
- adapter_model.safetensors +3 -0
- git_hash.txt +1 -0
- preprocessor_config.json +40 -0
- results.json +1 -0
- special_tokens_map.json +39 -0
- tokenizer.json +3 -0
- tokenizer_config.json +0 -0
- training_config.yml +41 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "./models/paligemma-3b-pt-448",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": false,
|
8 |
+
"init_lora_weights": "gaussian",
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 32,
|
14 |
+
"lora_dropout": 0.1,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": null,
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 32,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": "(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)",
|
23 |
+
"task_type": "FEATURE_EXTRACTION",
|
24 |
+
"use_dora": false,
|
25 |
+
"use_rslora": false
|
26 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:869494fb8a1a227a147ca5bfccc3cc685b8f09b22b069f6ce9b25df4344bcf58
|
3 |
+
size 78625112
|
git_hash.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
a8e91629dccba768a591c3dba1450860bd2ef76e
|
preprocessor_config.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_valid_processor_keys": [
|
3 |
+
"images",
|
4 |
+
"do_resize",
|
5 |
+
"size",
|
6 |
+
"resample",
|
7 |
+
"do_rescale",
|
8 |
+
"rescale_factor",
|
9 |
+
"do_normalize",
|
10 |
+
"image_mean",
|
11 |
+
"image_std",
|
12 |
+
"return_tensors",
|
13 |
+
"data_format",
|
14 |
+
"input_data_format",
|
15 |
+
"do_convert_rgb"
|
16 |
+
],
|
17 |
+
"do_convert_rgb": null,
|
18 |
+
"do_normalize": true,
|
19 |
+
"do_rescale": true,
|
20 |
+
"do_resize": true,
|
21 |
+
"image_mean": [
|
22 |
+
0.5,
|
23 |
+
0.5,
|
24 |
+
0.5
|
25 |
+
],
|
26 |
+
"image_processor_type": "SiglipImageProcessor",
|
27 |
+
"image_seq_length": 1024,
|
28 |
+
"image_std": [
|
29 |
+
0.5,
|
30 |
+
0.5,
|
31 |
+
0.5
|
32 |
+
],
|
33 |
+
"processor_class": "PaliGemmaProcessor",
|
34 |
+
"resample": 3,
|
35 |
+
"rescale_factor": 0.00392156862745098,
|
36 |
+
"size": {
|
37 |
+
"height": 448,
|
38 |
+
"width": 448
|
39 |
+
}
|
40 |
+
}
|
results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation_set": {"ndcg_at_1": 0.782, "ndcg_at_3": 0.82802, "ndcg_at_5": 0.83947, "ndcg_at_10": 0.85198, "ndcg_at_20": 0.85902, "ndcg_at_100": 0.86506, "ndcg_at_1000": 0.86667, "map_at_1": 0.782, "map_at_3": 0.81633, "map_at_5": 0.82263, "map_at_10": 0.82793, "map_at_20": 0.82984, "map_at_100": 0.83071, "map_at_1000": 0.83078, "recall_at_1": 0.782, "recall_at_3": 0.862, "recall_at_5": 0.89, "recall_at_10": 0.928, "recall_at_20": 0.956, "recall_at_100": 0.988, "recall_at_1000": 1.0, "precision_at_1": 0.782, "precision_at_3": 0.28733, "precision_at_5": 0.178, "precision_at_10": 0.0928, "precision_at_20": 0.0478, "precision_at_100": 0.00988, "precision_at_1000": 0.001, "mrr_at_1": 0.78, "mrr_at_3": 0.8156666666666667, "mrr_at_5": 0.8222666666666666, "mrr_at_10": 0.8277817460317461, "mrr_at_20": 0.829524681690858, "mrr_at_100": 0.8304143601302392, "mrr_at_1000": 0.830459459335204, "naucs_at_1_max": 0.05817217206932394, "naucs_at_1_std": 0.060016906170751665, "naucs_at_1_diff1": 0.9037095501183898, "naucs_at_3_max": 0.09049116354805792, "naucs_at_3_std": 0.1868639368885034, "naucs_at_3_diff1": 0.8896741953143658, "naucs_at_5_max": 0.09589382355467566, "naucs_at_5_std": 0.22617472725710755, "naucs_at_5_diff1": 0.8831801391633034, "naucs_at_10_max": 0.14739599543520784, "naucs_at_10_std": 0.39555451810353637, "naucs_at_10_diff1": 0.8772045855379201, "naucs_at_20_max": 0.08587980646803786, "naucs_at_20_std": 0.622145828028182, "naucs_at_20_diff1": 0.9097063067651333, "naucs_at_100_max": 0.05983504512917692, "naucs_at_100_std": 0.9782135076252826, "naucs_at_100_diff1": 0.8576097105508842, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "syntheticDocQA_energy": {"ndcg_at_1": 0.89, "ndcg_at_3": 0.92655, "ndcg_at_5": 0.93085, "ndcg_at_10": 0.93442, "ndcg_at_20": 0.93681, "ndcg_at_100": 0.94046, "ndcg_at_1000": 0.94046, "map_at_1": 0.89, "map_at_3": 0.91833, "map_at_5": 0.92083, "map_at_10": 0.9225, "map_at_20": 0.92309, "map_at_100": 0.92355, "map_at_1000": 0.92355, "recall_at_1": 0.89, "recall_at_3": 0.95, "recall_at_5": 0.96, "recall_at_10": 0.97, "recall_at_20": 0.98, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.89, "precision_at_3": 0.31667, "precision_at_5": 0.192, "precision_at_10": 0.097, "precision_at_20": 0.049, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.92, "mrr_at_3": 0.9366666666666668, "mrr_at_5": 0.9366666666666668, "mrr_at_10": 0.9383333333333335, "mrr_at_20": 0.9389215686274511, "mrr_at_100": 0.9393993464052288, "mrr_at_1000": 0.9393993464052288, "naucs_at_1_max": 0.21029121209518126, "naucs_at_1_std": -0.07052658706296669, "naucs_at_1_diff1": 0.9518941671677688, "naucs_at_3_max": 0.19813258636788023, "naucs_at_3_std": -0.8183940242763749, "naucs_at_3_diff1": 0.8954248366013042, "naucs_at_5_max": 0.5355975723622793, "naucs_at_5_std": -0.5880018674136268, "naucs_at_5_diff1": 0.9019607843137248, "naucs_at_10_max": 0.473389355742298, "naucs_at_10_std": -0.5961718020541553, "naucs_at_10_diff1": 0.9128540305010848, "naucs_at_20_max": 0.3489729225023391, "naucs_at_20_std": -0.024276377217554025, "naucs_at_20_diff1": 0.9346405228758136, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_healthcare_industry": {"ndcg_at_1": 0.93, "ndcg_at_3": 0.95893, "ndcg_at_5": 0.96323, "ndcg_at_10": 0.96323, "ndcg_at_20": 0.96323, "ndcg_at_100": 0.96496, "ndcg_at_1000": 0.96496, "map_at_1": 0.93, "map_at_3": 0.95167, "map_at_5": 0.95417, "map_at_10": 0.95417, "map_at_20": 0.95417, "map_at_100": 0.95435, "map_at_1000": 0.95435, "recall_at_1": 0.93, "recall_at_3": 0.98, "recall_at_5": 0.99, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.93, "precision_at_3": 0.32667, "precision_at_5": 0.198, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.94, "mrr_at_3": 0.96, "mrr_at_5": 0.96, "mrr_at_10": 0.96, "mrr_at_20": 0.96, "mrr_at_100": 0.9601851851851851, "mrr_at_1000": 0.9601851851851851, "naucs_at_1_max": 0.6399226357209566, "naucs_at_1_std": -0.26217153528077874, "naucs_at_1_diff1": 0.9253034547152207, "naucs_at_3_max": 0.9346405228758099, "naucs_at_3_std": -0.5929038281979536, "naucs_at_3_diff1": 0.9346405228758099, "naucs_at_5_max": 0.8692810457516413, "naucs_at_5_std": -1.7399626517273863, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 0.8692810457516413, "naucs_at_10_std": -1.7399626517273863, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 0.8692810457516413, "naucs_at_20_std": -1.7399626517273863, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.95, "ndcg_at_3": 0.97524, "ndcg_at_5": 0.97524, "ndcg_at_10": 0.97524, "ndcg_at_20": 0.97524, "ndcg_at_100": 0.97736, "ndcg_at_1000": 0.97736, "map_at_1": 0.95, "map_at_3": 0.97, "map_at_5": 0.97, "map_at_10": 0.97, "map_at_20": 0.97, "map_at_100": 0.9704, "map_at_1000": 0.9704, "recall_at_1": 0.95, "recall_at_3": 0.99, "recall_at_5": 0.99, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.95, "precision_at_3": 0.33, "precision_at_5": 0.198, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.95, "mrr_at_3": 0.97, "mrr_at_5": 0.97, "mrr_at_10": 0.97, "mrr_at_20": 0.97, "mrr_at_100": 0.9704166666666666, "mrr_at_1000": 0.9704166666666666, "naucs_at_1_max": -0.05901027077497799, "naucs_at_1_std": -1.1517273576097071, "naucs_at_1_diff1": 0.9183006535947691, "naucs_at_3_max": -1.1517273576097802, "naucs_at_3_std": -1.7399626517274398, "naucs_at_3_diff1": 0.8692810457516356, "naucs_at_5_max": -1.1517273576097316, "naucs_at_5_std": -1.7399626517273863, "naucs_at_5_diff1": 0.8692810457516413, "naucs_at_10_max": -1.1517273576097316, "naucs_at_10_std": -1.7399626517273863, "naucs_at_10_diff1": 0.8692810457516413, "naucs_at_20_max": -1.1517273576097316, "naucs_at_20_std": -1.7399626517273863, "naucs_at_20_diff1": 0.8692810457516413, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_government_reports": {"ndcg_at_1": 0.89, "ndcg_at_3": 0.93917, "ndcg_at_5": 0.93917, "ndcg_at_10": 0.94583, "ndcg_at_20": 0.94862, "ndcg_at_100": 0.94862, "ndcg_at_1000": 0.94862, "map_at_1": 0.89, "map_at_3": 0.92833, "map_at_5": 0.92833, "map_at_10": 0.93119, "map_at_20": 0.9321, "map_at_100": 0.9321, "map_at_1000": 0.9321, "recall_at_1": 0.89, "recall_at_3": 0.97, "recall_at_5": 0.97, "recall_at_10": 0.99, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.89, "precision_at_3": 0.32333, "precision_at_5": 0.194, "precision_at_10": 0.099, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.91, "mrr_at_3": 0.9383333333333332, "mrr_at_5": 0.9403333333333334, "mrr_at_10": 0.9420000000000001, "mrr_at_20": 0.9429090909090909, "mrr_at_100": 0.9429090909090909, "mrr_at_1000": 0.9429090909090909, "naucs_at_1_max": 0.04282278154797617, "naucs_at_1_std": 0.1072502362339995, "naucs_at_1_diff1": 0.8146636886865386, "naucs_at_3_max": 0.43526299408652347, "naucs_at_3_std": 0.04154995331465614, "naucs_at_3_diff1": 0.9128540305010931, "naucs_at_5_max": 0.4352629940865253, "naucs_at_5_std": 0.04154995331466144, "naucs_at_5_diff1": 0.9128540305010848, "naucs_at_10_max": 0.8692810457516413, "naucs_at_10_std": -1.1517273576097316, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "infovqa_subsampled": {"ndcg_at_1": 0.744, "ndcg_at_3": 0.80847, "ndcg_at_5": 0.82087, "ndcg_at_10": 0.82906, "ndcg_at_20": 0.83358, "ndcg_at_100": 0.84315, "ndcg_at_1000": 0.84589, "map_at_1": 0.744, "map_at_3": 0.79333, "map_at_5": 0.80023, "map_at_10": 0.80348, "map_at_20": 0.8047, "map_at_100": 0.806, "map_at_1000": 0.80609, "recall_at_1": 0.744, "recall_at_3": 0.852, "recall_at_5": 0.882, "recall_at_10": 0.908, "recall_at_20": 0.926, "recall_at_100": 0.978, "recall_at_1000": 1.0, "precision_at_1": 0.744, "precision_at_3": 0.284, "precision_at_5": 0.1764, "precision_at_10": 0.0908, "precision_at_20": 0.0463, "precision_at_100": 0.00978, "precision_at_1000": 0.001, "mrr_at_1": 0.744, "mrr_at_3": 0.7936666666666666, "mrr_at_5": 0.8009666666666665, "mrr_at_10": 0.8037817460317458, "mrr_at_20": 0.8048973748473746, "mrr_at_100": 0.8062602887117994, "mrr_at_1000": 0.8063694924985594, "naucs_at_1_max": 0.24431248870710812, "naucs_at_1_std": 0.07172834639202393, "naucs_at_1_diff1": 0.8964996139884034, "naucs_at_3_max": 0.21414077529185624, "naucs_at_3_std": 0.11988431233035729, "naucs_at_3_diff1": 0.8659050781352937, "naucs_at_5_max": 0.21414846033360693, "naucs_at_5_std": 0.293045261509147, "naucs_at_5_diff1": 0.8451533207382654, "naucs_at_10_max": 0.3168615272195837, "naucs_at_10_std": 0.33820484715625165, "naucs_at_10_diff1": 0.8003795721187013, "naucs_at_20_max": 0.350707850707853, "naucs_at_20_std": 0.5118479824362165, "naucs_at_20_diff1": 0.793461528755648, "naucs_at_100_max": 0.3314659197012103, "naucs_at_100_std": 0.8232323232323228, "naucs_at_100_diff1": 0.80561921738392, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "docvqa_subsampled": {"ndcg_at_1": 0.448, "ndcg_at_3": 0.51595, "ndcg_at_5": 0.53729, "ndcg_at_10": 0.55552, "ndcg_at_20": 0.57175, "ndcg_at_100": 0.59524, "ndcg_at_1000": 0.61246, "map_at_1": 0.448, "map_at_3": 0.49933, "map_at_5": 0.51113, "map_at_10": 0.51872, "map_at_20": 0.52319, "map_at_100": 0.5262, "map_at_1000": 0.52686, "recall_at_1": 0.448, "recall_at_3": 0.564, "recall_at_5": 0.616, "recall_at_10": 0.672, "recall_at_20": 0.736, "recall_at_100": 0.866, "recall_at_1000": 1.0, "precision_at_1": 0.448, "precision_at_3": 0.188, "precision_at_5": 0.1232, "precision_at_10": 0.0672, "precision_at_20": 0.0368, "precision_at_100": 0.00866, "precision_at_1000": 0.001, "mrr_at_1": 0.438, "mrr_at_3": 0.49433333333333357, "mrr_at_5": 0.5050333333333334, "mrr_at_10": 0.5120126984126985, "mrr_at_20": 0.5164523609946985, "mrr_at_100": 0.5196839376447235, "mrr_at_1000": 0.5203272944543572, "naucs_at_1_max": 0.004480643202613047, "naucs_at_1_std": 0.37306850777831274, "naucs_at_1_diff1": 0.8497152488431985, "naucs_at_3_max": -0.023586707954811218, "naucs_at_3_std": 0.43916842027225106, "naucs_at_3_diff1": 0.7622082313960983, "naucs_at_5_max": -0.01834430856067735, "naucs_at_5_std": 0.48723595313435425, "naucs_at_5_diff1": 0.7137695324430982, "naucs_at_10_max": -0.00746512197111837, "naucs_at_10_std": 0.525295069484105, "naucs_at_10_diff1": 0.7022449756601673, "naucs_at_20_max": -0.020896741742223748, "naucs_at_20_std": 0.6338344600006404, "naucs_at_20_diff1": 0.6637098324416104, "naucs_at_100_max": -0.07201954538247493, "naucs_at_100_std": 0.8347362958905693, "naucs_at_100_diff1": 0.6765426582296729, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "arxivqa_subsampled": {"ndcg_at_1": 0.714, "ndcg_at_3": 0.76638, "ndcg_at_5": 0.78368, "ndcg_at_10": 0.79997, "ndcg_at_20": 0.80892, "ndcg_at_100": 0.82042, "ndcg_at_1000": 0.82197, "map_at_1": 0.714, "map_at_3": 0.754, "map_at_5": 0.7636, "map_at_10": 0.77039, "map_at_20": 0.77304, "map_at_100": 0.77477, "map_at_1000": 0.77483, "recall_at_1": 0.714, "recall_at_3": 0.802, "recall_at_5": 0.844, "recall_at_10": 0.894, "recall_at_20": 0.928, "recall_at_100": 0.988, "recall_at_1000": 1.0, "precision_at_1": 0.714, "precision_at_3": 0.26733, "precision_at_5": 0.1688, "precision_at_10": 0.0894, "precision_at_20": 0.0464, "precision_at_100": 0.00988, "precision_at_1000": 0.001, "mrr_at_1": 0.722, "mrr_at_3": 0.7573333333333332, "mrr_at_5": 0.7667333333333332, "mrr_at_10": 0.7737380952380952, "mrr_at_20": 0.7765198548183843, "mrr_at_100": 0.7779628670060682, "mrr_at_1000": 0.778022198523037, "naucs_at_1_max": -0.07167414639267208, "naucs_at_1_std": 0.04919747601426205, "naucs_at_1_diff1": 0.8805605857806468, "naucs_at_3_max": -0.06006917484294429, "naucs_at_3_std": 0.1344875918885953, "naucs_at_3_diff1": 0.8181185652774557, "naucs_at_5_max": -0.004483910673466629, "naucs_at_5_std": 0.04728830164033324, "naucs_at_5_diff1": 0.8102815946142837, "naucs_at_10_max": 0.03561223222050493, "naucs_at_10_std": 0.19361766324121799, "naucs_at_10_diff1": 0.7673180341834832, "naucs_at_20_max": 0.01588598402323649, "naucs_at_20_std": 0.18665058616038757, "naucs_at_20_diff1": 0.767740429505134, "naucs_at_100_max": 0.04590725178961408, "naucs_at_100_std": 0.6051976346094006, "naucs_at_100_diff1": 0.8883442265795202, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "tabfquad_subsampled": {"ndcg_at_1": 0.75714, "ndcg_at_3": 0.83988, "ndcg_at_5": 0.85479, "ndcg_at_10": 0.86528, "ndcg_at_20": 0.86903, "ndcg_at_100": 0.87343, "ndcg_at_1000": 0.87343, "map_at_1": 0.75714, "map_at_3": 0.82024, "map_at_5": 0.82863, "map_at_10": 0.833, "map_at_20": 0.8341, "map_at_100": 0.83487, "map_at_1000": 0.83487, "recall_at_1": 0.75714, "recall_at_3": 0.89643, "recall_at_5": 0.93214, "recall_at_10": 0.96429, "recall_at_20": 0.97857, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.75714, "precision_at_3": 0.29881, "precision_at_5": 0.18643, "precision_at_10": 0.09643, "precision_at_20": 0.04893, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.7714285714285715, "mrr_at_3": 0.8273809523809526, "mrr_at_5": 0.8355952380952381, "mrr_at_10": 0.8398993764172337, "mrr_at_20": 0.8410148680684395, "mrr_at_100": 0.8417679492154282, "mrr_at_1000": 0.8417679492154282, "naucs_at_1_max": 0.3556353117634457, "naucs_at_1_std": 0.21661973124368672, "naucs_at_1_diff1": 0.8554516514126538, "naucs_at_3_max": 0.37286134739157034, "naucs_at_3_std": 0.3338076910637462, "naucs_at_3_diff1": 0.7320094440311777, "naucs_at_5_max": 0.43618851049191654, "naucs_at_5_std": 0.42776057791537964, "naucs_at_5_diff1": 0.8366258784215467, "naucs_at_10_max": 0.44010270774976973, "naucs_at_10_std": 0.5859943977591013, "naucs_at_10_diff1": 0.8807189542483634, "naucs_at_20_max": 0.7323373793962044, "naucs_at_20_std": 0.4025832555244304, "naucs_at_20_diff1": 0.8665577342048016, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "tatdqa": {"ndcg_at_1": 0.52616, "ndcg_at_3": 0.63298, "ndcg_at_5": 0.66356, "ndcg_at_10": 0.6897, "ndcg_at_20": 0.7012, "ndcg_at_100": 0.71362, "ndcg_at_1000": 0.71733, "map_at_1": 0.52616, "map_at_3": 0.60653, "map_at_5": 0.62358, "map_at_10": 0.63465, "map_at_20": 0.63793, "map_at_100": 0.63975, "map_at_1000": 0.63993, "recall_at_1": 0.52616, "recall_at_3": 0.70956, "recall_at_5": 0.78352, "recall_at_10": 0.8629, "recall_at_20": 0.9074, "recall_at_100": 0.97294, "recall_at_1000": 1.0, "precision_at_1": 0.52616, "precision_at_3": 0.23652, "precision_at_5": 0.1567, "precision_at_10": 0.08629, "precision_at_20": 0.04537, "precision_at_100": 0.00973, "precision_at_1000": 0.001, "mrr_at_1": 0.5195429945880938, "mrr_at_3": 0.6040288634997003, "mrr_at_5": 0.620986169573062, "mrr_at_10": 0.6322717120522299, "mrr_at_20": 0.6353331416669995, "mrr_at_100": 0.6372441378072822, "mrr_at_1000": 0.6374337620314994, "naucs_at_1_max": 0.13750389195260027, "naucs_at_1_std": -0.0641600292234218, "naucs_at_1_diff1": 0.6972849803299114, "naucs_at_3_max": 0.1229709343382001, "naucs_at_3_std": -0.013317720735276251, "naucs_at_3_diff1": 0.5686269471942104, "naucs_at_5_max": 0.13493612409247746, "naucs_at_5_std": 0.009513465579120639, "naucs_at_5_diff1": 0.5267604620708811, "naucs_at_10_max": 0.16382278445190818, "naucs_at_10_std": 0.0886251771544149, "naucs_at_10_diff1": 0.49350500679148773, "naucs_at_20_max": 0.21650940734056756, "naucs_at_20_std": 0.2850014970530571, "naucs_at_20_diff1": 0.4755857918074485, "naucs_at_100_max": 0.12096932417914774, "naucs_at_100_std": 0.5576283374096134, "naucs_at_100_diff1": 0.4569584691604355, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "shift_project": {"ndcg_at_1": 0.62, "ndcg_at_3": 0.76226, "ndcg_at_5": 0.78678, "ndcg_at_10": 0.80013, "ndcg_at_20": 0.80276, "ndcg_at_100": 0.8079, "ndcg_at_1000": 0.8079, "map_at_1": 0.62, "map_at_3": 0.72833, "map_at_5": 0.74183, "map_at_10": 0.7476, "map_at_20": 0.74836, "map_at_100": 0.74891, "map_at_1000": 0.74891, "recall_at_1": 0.62, "recall_at_3": 0.86, "recall_at_5": 0.92, "recall_at_10": 0.96, "recall_at_20": 0.97, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.62, "precision_at_3": 0.28667, "precision_at_5": 0.184, "precision_at_10": 0.096, "precision_at_20": 0.0485, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.65, "mrr_at_3": 0.7449999999999999, "mrr_at_5": 0.7655, "mrr_at_10": 0.7665, "mrr_at_20": 0.7672692307692306, "mrr_at_100": 0.7678543583609371, "mrr_at_1000": 0.7678543583609371, "naucs_at_1_max": 0.011291265421653209, "naucs_at_1_std": -0.27558994554348853, "naucs_at_1_diff1": 0.46524936159739105, "naucs_at_3_max": -0.1449702092282115, "naucs_at_3_std": -0.24071636414022332, "naucs_at_3_diff1": 0.45697658306775685, "naucs_at_5_max": -0.2214635854341673, "naucs_at_5_std": -0.42973856209149836, "naucs_at_5_diff1": 0.36350373482726567, "naucs_at_10_max": -0.367063492063485, "naucs_at_10_std": -0.749766573295977, "naucs_at_10_diff1": 0.052054154995333855, "naucs_at_20_max": -0.7791783380018631, "naucs_at_20_std": -1.040616246498598, "naucs_at_20_diff1": 0.12651727357610051, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
{
|
4 |
+
"content": "<image>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false
|
9 |
+
}
|
10 |
+
],
|
11 |
+
"bos_token": {
|
12 |
+
"content": "<bos>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false
|
17 |
+
},
|
18 |
+
"eos_token": {
|
19 |
+
"content": "<eos>",
|
20 |
+
"lstrip": false,
|
21 |
+
"normalized": false,
|
22 |
+
"rstrip": false,
|
23 |
+
"single_word": false
|
24 |
+
},
|
25 |
+
"pad_token": {
|
26 |
+
"content": "<pad>",
|
27 |
+
"lstrip": false,
|
28 |
+
"normalized": false,
|
29 |
+
"rstrip": false,
|
30 |
+
"single_word": false
|
31 |
+
},
|
32 |
+
"unk_token": {
|
33 |
+
"content": "<unk>",
|
34 |
+
"lstrip": false,
|
35 |
+
"normalized": false,
|
36 |
+
"rstrip": false,
|
37 |
+
"single_word": false
|
38 |
+
}
|
39 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1df2ab04780faccf51a881d7c5a7026cc6f979083af2eebf709d051b8d47134b
|
3 |
+
size 17763458
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
training_config.yml
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
config:
|
2 |
+
(): colpali_engine.utils.train_colpali_engine_models.ColModelTrainingConfig
|
3 |
+
output_dir: !path ../../../models/without_tabfquad/train_colpali-3b-pt-448
|
4 |
+
processor:
|
5 |
+
() : colpali_engine.utils.wrapper.AutoProcessorWrapper
|
6 |
+
pretrained_model_name_or_path: "./models/paligemma-3b-pt-448"
|
7 |
+
max_length: 50
|
8 |
+
model:
|
9 |
+
(): colpali_engine.utils.wrapper.AutoColModelWrapper
|
10 |
+
pretrained_model_name_or_path: "./models/paligemma-3b-pt-448"
|
11 |
+
training_objective: "colbertv1"
|
12 |
+
# attn_implementation: "eager"
|
13 |
+
torch_dtype: !ext torch.bfloat16
|
14 |
+
# device_map: "auto"
|
15 |
+
# quantization_config:
|
16 |
+
# (): transformers.BitsAndBytesConfig
|
17 |
+
# load_in_4bit: true
|
18 |
+
# bnb_4bit_quant_type: "nf4"
|
19 |
+
# bnb_4bit_compute_dtype: "bfloat16"
|
20 |
+
# bnb_4bit_use_double_quant: true
|
21 |
+
|
22 |
+
dataset_loading_func: !ext colpali_engine.utils.dataset_transformation.load_train_set
|
23 |
+
eval_dataset_loader: !import ../data/test_data.yaml
|
24 |
+
|
25 |
+
max_length: 50
|
26 |
+
run_eval: true
|
27 |
+
add_suffix: true
|
28 |
+
loss_func:
|
29 |
+
(): colpali_engine.loss.colbert_loss.ColbertPairwiseCELoss
|
30 |
+
tr_args: !import ../tr_args/default_tr_args.yaml
|
31 |
+
peft_config:
|
32 |
+
(): peft.LoraConfig
|
33 |
+
r: 32
|
34 |
+
lora_alpha: 32
|
35 |
+
lora_dropout: 0.1
|
36 |
+
init_lora_weights: "gaussian"
|
37 |
+
bias: "none"
|
38 |
+
task_type: "FEATURE_EXTRACTION"
|
39 |
+
target_modules: '(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)'
|
40 |
+
# target_modules: '(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)'
|
41 |
+
|