96abhishekarora
commited on
Commit
•
9328dfd
1
Parent(s):
990e5ec
Updated model with better training and evaluation. Test and val data included as pickle files. Older Legacy files were removed to avoid confusion.
Browse files- .gitattributes +4 -38
- Information-Retrieval_evaluation_eval_results.csv +5 -0
- Information-Retrieval_evaluation_test_results.csv +5 -0
- README.md +1 -1
- config.json +1 -1
- eval/Information-Retrieval_evaluation_eval_results.csv +0 -0
- special_tokens_map.json +6 -42
- test_data.pickle +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +0 -7
- val_data.pickle +3 -0
.gitattributes
CHANGED
@@ -1,39 +1,5 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
model.safetensors filter=lfs diff=lfs merge=lfs -text
|
37 |
-
.git/lfs/objects/
|
38 |
-
.
|
39 |
-
.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
model.safetensors filter=lfs diff=lfs merge=lfs -text
|
2 |
+
.git/lfs/objects/bd/7a/bd7a72c763bbb2e770f97994bf96540aa1e424f50d331e9e7ceba8e214f5c49e filter=lfs diff=lfs merge=lfs -text
|
3 |
+
test_data.pickle filter=lfs diff=lfs merge=lfs -text
|
4 |
+
val_data.pickle filter=lfs diff=lfs merge=lfs -text
|
5 |
+
sentencepiece.bpe.model filter=lfs diff=lfs merge=lfs -text
|
Information-Retrieval_evaluation_eval_results.csv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,cos_sim-Accuracy@1,cos_sim-Accuracy@3,cos_sim-Accuracy@5,cos_sim-Accuracy@10,cos_sim-Precision@1,cos_sim-Recall@1,cos_sim-Precision@3,cos_sim-Recall@3,cos_sim-Precision@5,cos_sim-Recall@5,cos_sim-Precision@10,cos_sim-Recall@10,cos_sim-MRR@10,cos_sim-NDCG@10,cos_sim-MAP@100
|
2 |
+
0,0,0.6184971098265896,0.8901734104046243,0.930635838150289,0.9710982658959537,0.6184971098265896,0.6184971098265896,0.29672447013487474,0.884393063583815,0.18612716763005774,0.9248554913294798,0.0982658959537572,0.9682080924855492,0.7596063859069642,0.8109979174972792,0.7595047697488971
|
3 |
+
0,0,0.6184971098265896,0.8901734104046243,0.930635838150289,0.9710982658959537,0.6184971098265896,0.6184971098265896,0.29672447013487474,0.884393063583815,0.18612716763005774,0.9248554913294798,0.0982658959537572,0.9682080924855492,0.7596063859069642,0.8109979174972792,0.7595047697488971
|
4 |
+
0,0,0.3956043956043956,0.5732600732600732,0.6556776556776557,0.7875457875457875,0.3956043956043956,0.3956043956043956,0.19108669108669107,0.5732600732600732,0.1311355311355311,0.6556776556776557,0.07875457875457875,0.7875457875457875,0.5070752660038369,0.5733445499232982,0.5169844475014571
|
5 |
+
0,0,0.3956043956043956,0.5732600732600732,0.6556776556776557,0.7875457875457875,0.3956043956043956,0.3956043956043956,0.19108669108669107,0.5732600732600732,0.1311355311355311,0.6556776556776557,0.07875457875457875,0.7875457875457875,0.5070752660038369,0.5733445499232982,0.5169844475014571
|
Information-Retrieval_evaluation_test_results.csv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,cos_sim-Accuracy@1,cos_sim-Accuracy@3,cos_sim-Accuracy@5,cos_sim-Accuracy@10,cos_sim-Precision@1,cos_sim-Recall@1,cos_sim-Precision@3,cos_sim-Recall@3,cos_sim-Precision@5,cos_sim-Recall@5,cos_sim-Precision@10,cos_sim-Recall@10,cos_sim-MRR@10,cos_sim-NDCG@10,cos_sim-MAP@100
|
2 |
+
0,0,0.8766233766233766,0.9805194805194806,0.987012987012987,1.0,0.8766233766233766,0.8625541125541126,0.3354978354978355,0.9783549783549783,0.20259740259740258,0.9848484848484848,0.10324675324675324,1.0,0.9299242424242423,0.947124363967297,0.9286873840445271
|
3 |
+
0,0,0.8896103896103896,1.0,1.0,1.0,0.8896103896103896,0.8755411255411256,0.341991341991342,0.9978354978354977,0.20519480519480518,0.9978354978354977,0.10324675324675324,1.0,0.9415584415584416,0.9562697249477613,0.9404761904761906
|
4 |
+
0,0,0.7739463601532567,0.9233716475095786,0.9655172413793104,0.9808429118773946,0.7739463601532567,0.7739463601532567,0.30779054916985943,0.9233716475095786,0.19310344827586207,0.9655172413793104,0.09808429118773947,0.9808429118773946,0.85460834397616,0.8859652217228802,0.8558258273397413
|
5 |
+
0,0,0.7739463601532567,0.9272030651340997,0.9578544061302682,0.9808429118773946,0.7739463601532567,0.7739463601532567,0.30906768837803317,0.9272030651340997,0.19157088122605365,0.9578544061302682,0.09808429118773947,0.9808429118773946,0.8563370431186524,0.8873142219416177,0.8576794640013031
|
README.md
CHANGED
@@ -10,7 +10,7 @@ tags:
|
|
10 |
|
11 |
---
|
12 |
|
13 |
-
#
|
14 |
|
15 |
This is a [LinkTransformer](https://linktransformer.github.io/) model. At its core this model this is a sentence transformer model [sentence-transformers](https://www.SBERT.net) model- it just wraps around the class.
|
16 |
It is designed for quick and easy record linkage (entity-matching) through the LinkTransformer package. The tasks include clustering, deduplication, linking, aggregation and more.
|
|
|
10 |
|
11 |
---
|
12 |
|
13 |
+
# {MODEL_NAME}
|
14 |
|
15 |
This is a [LinkTransformer](https://linktransformer.github.io/) model. At its core this model this is a sentence transformer model [sentence-transformers](https://www.SBERT.net) model- it just wraps around the class.
|
16 |
It is designed for quick and easy record linkage (entity-matching) through the LinkTransformer package. The tasks include clustering, deduplication, linking, aggregation and more.
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"CamembertModel"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "dangvantuan/sentence-camembert-large",
|
3 |
"architectures": [
|
4 |
"CamembertModel"
|
5 |
],
|
eval/Information-Retrieval_evaluation_eval_results.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
CHANGED
@@ -3,27 +3,9 @@
|
|
3 |
"<s>NOTUSED",
|
4 |
"</s>NOTUSED"
|
5 |
],
|
6 |
-
"bos_token":
|
7 |
-
|
8 |
-
|
9 |
-
"normalized": false,
|
10 |
-
"rstrip": false,
|
11 |
-
"single_word": false
|
12 |
-
},
|
13 |
-
"cls_token": {
|
14 |
-
"content": "<s>",
|
15 |
-
"lstrip": false,
|
16 |
-
"normalized": false,
|
17 |
-
"rstrip": false,
|
18 |
-
"single_word": false
|
19 |
-
},
|
20 |
-
"eos_token": {
|
21 |
-
"content": "</s>",
|
22 |
-
"lstrip": false,
|
23 |
-
"normalized": false,
|
24 |
-
"rstrip": false,
|
25 |
-
"single_word": false
|
26 |
-
},
|
27 |
"mask_token": {
|
28 |
"content": "<mask>",
|
29 |
"lstrip": true,
|
@@ -31,25 +13,7 @@
|
|
31 |
"rstrip": false,
|
32 |
"single_word": false
|
33 |
},
|
34 |
-
"pad_token":
|
35 |
-
|
36 |
-
|
37 |
-
"normalized": false,
|
38 |
-
"rstrip": false,
|
39 |
-
"single_word": false
|
40 |
-
},
|
41 |
-
"sep_token": {
|
42 |
-
"content": "</s>",
|
43 |
-
"lstrip": false,
|
44 |
-
"normalized": false,
|
45 |
-
"rstrip": false,
|
46 |
-
"single_word": false
|
47 |
-
},
|
48 |
-
"unk_token": {
|
49 |
-
"content": "<unk>",
|
50 |
-
"lstrip": false,
|
51 |
-
"normalized": false,
|
52 |
-
"rstrip": false,
|
53 |
-
"single_word": false
|
54 |
-
}
|
55 |
}
|
|
|
3 |
"<s>NOTUSED",
|
4 |
"</s>NOTUSED"
|
5 |
],
|
6 |
+
"bos_token": "<s>",
|
7 |
+
"cls_token": "<s>",
|
8 |
+
"eos_token": "</s>",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
"mask_token": {
|
10 |
"content": "<mask>",
|
11 |
"lstrip": true,
|
|
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
},
|
16 |
+
"pad_token": "<pad>",
|
17 |
+
"sep_token": "</s>",
|
18 |
+
"unk_token": "<unk>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
}
|
test_data.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43f227e16bf6c140dacf290f7318dca890a0dd320b11ba3149802313a47f23db
|
3 |
+
size 32258
|
tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
CHANGED
@@ -66,17 +66,10 @@
|
|
66 |
"cls_token": "<s>",
|
67 |
"eos_token": "</s>",
|
68 |
"mask_token": "<mask>",
|
69 |
-
"max_length": 514,
|
70 |
"model_max_length": 1000000000000000019884624838656,
|
71 |
-
"pad_to_multiple_of": null,
|
72 |
"pad_token": "<pad>",
|
73 |
-
"pad_token_type_id": 0,
|
74 |
-
"padding_side": "right",
|
75 |
"sep_token": "</s>",
|
76 |
"sp_model_kwargs": {},
|
77 |
-
"stride": 0,
|
78 |
"tokenizer_class": "CamembertTokenizer",
|
79 |
-
"truncation_side": "right",
|
80 |
-
"truncation_strategy": "longest_first",
|
81 |
"unk_token": "<unk>"
|
82 |
}
|
|
|
66 |
"cls_token": "<s>",
|
67 |
"eos_token": "</s>",
|
68 |
"mask_token": "<mask>",
|
|
|
69 |
"model_max_length": 1000000000000000019884624838656,
|
|
|
70 |
"pad_token": "<pad>",
|
|
|
|
|
71 |
"sep_token": "</s>",
|
72 |
"sp_model_kwargs": {},
|
|
|
73 |
"tokenizer_class": "CamembertTokenizer",
|
|
|
|
|
74 |
"unk_token": "<unk>"
|
75 |
}
|
val_data.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19c473d4402745303e49184b01708b83a2d0f6cd54e36eabefbcf150ee4731ba
|
3 |
+
size 187515
|