96abhishekarora commited on
Commit
9328dfd
1 Parent(s): 990e5ec

Updated model with better training and evaluation. Test and val data included as pickle files. Older Legacy files were removed to avoid confusion.

Browse files
.gitattributes CHANGED
@@ -1,39 +1,5 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  model.safetensors filter=lfs diff=lfs merge=lfs -text
37
- .git/lfs/objects/1a/76/1a76a58ef6d75109a94ea9fef24bd95c3e37b89337a9263dfd2e78be67d40fad filter=lfs diff=lfs merge=lfs -text
38
- .git/lfs/objects/d1/50/d150d5f1dc925180772426d6240cefc69cc62920fad2a06d088e0ce6ec787d61 filter=lfs diff=lfs merge=lfs -text
39
- .git/lfs/objects/d4/17/d417a0fe87c0361764cffc49b15f5bbe663a758233c22ab6d6a51a5ce008adfd filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  model.safetensors filter=lfs diff=lfs merge=lfs -text
2
+ .git/lfs/objects/bd/7a/bd7a72c763bbb2e770f97994bf96540aa1e424f50d331e9e7ceba8e214f5c49e filter=lfs diff=lfs merge=lfs -text
3
+ test_data.pickle filter=lfs diff=lfs merge=lfs -text
4
+ val_data.pickle filter=lfs diff=lfs merge=lfs -text
5
+ sentencepiece.bpe.model filter=lfs diff=lfs merge=lfs -text
Information-Retrieval_evaluation_eval_results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ epoch,steps,cos_sim-Accuracy@1,cos_sim-Accuracy@3,cos_sim-Accuracy@5,cos_sim-Accuracy@10,cos_sim-Precision@1,cos_sim-Recall@1,cos_sim-Precision@3,cos_sim-Recall@3,cos_sim-Precision@5,cos_sim-Recall@5,cos_sim-Precision@10,cos_sim-Recall@10,cos_sim-MRR@10,cos_sim-NDCG@10,cos_sim-MAP@100
2
+ 0,0,0.6184971098265896,0.8901734104046243,0.930635838150289,0.9710982658959537,0.6184971098265896,0.6184971098265896,0.29672447013487474,0.884393063583815,0.18612716763005774,0.9248554913294798,0.0982658959537572,0.9682080924855492,0.7596063859069642,0.8109979174972792,0.7595047697488971
3
+ 0,0,0.6184971098265896,0.8901734104046243,0.930635838150289,0.9710982658959537,0.6184971098265896,0.6184971098265896,0.29672447013487474,0.884393063583815,0.18612716763005774,0.9248554913294798,0.0982658959537572,0.9682080924855492,0.7596063859069642,0.8109979174972792,0.7595047697488971
4
+ 0,0,0.3956043956043956,0.5732600732600732,0.6556776556776557,0.7875457875457875,0.3956043956043956,0.3956043956043956,0.19108669108669107,0.5732600732600732,0.1311355311355311,0.6556776556776557,0.07875457875457875,0.7875457875457875,0.5070752660038369,0.5733445499232982,0.5169844475014571
5
+ 0,0,0.3956043956043956,0.5732600732600732,0.6556776556776557,0.7875457875457875,0.3956043956043956,0.3956043956043956,0.19108669108669107,0.5732600732600732,0.1311355311355311,0.6556776556776557,0.07875457875457875,0.7875457875457875,0.5070752660038369,0.5733445499232982,0.5169844475014571
Information-Retrieval_evaluation_test_results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ epoch,steps,cos_sim-Accuracy@1,cos_sim-Accuracy@3,cos_sim-Accuracy@5,cos_sim-Accuracy@10,cos_sim-Precision@1,cos_sim-Recall@1,cos_sim-Precision@3,cos_sim-Recall@3,cos_sim-Precision@5,cos_sim-Recall@5,cos_sim-Precision@10,cos_sim-Recall@10,cos_sim-MRR@10,cos_sim-NDCG@10,cos_sim-MAP@100
2
+ 0,0,0.8766233766233766,0.9805194805194806,0.987012987012987,1.0,0.8766233766233766,0.8625541125541126,0.3354978354978355,0.9783549783549783,0.20259740259740258,0.9848484848484848,0.10324675324675324,1.0,0.9299242424242423,0.947124363967297,0.9286873840445271
3
+ 0,0,0.8896103896103896,1.0,1.0,1.0,0.8896103896103896,0.8755411255411256,0.341991341991342,0.9978354978354977,0.20519480519480518,0.9978354978354977,0.10324675324675324,1.0,0.9415584415584416,0.9562697249477613,0.9404761904761906
4
+ 0,0,0.7739463601532567,0.9233716475095786,0.9655172413793104,0.9808429118773946,0.7739463601532567,0.7739463601532567,0.30779054916985943,0.9233716475095786,0.19310344827586207,0.9655172413793104,0.09808429118773947,0.9808429118773946,0.85460834397616,0.8859652217228802,0.8558258273397413
5
+ 0,0,0.7739463601532567,0.9272030651340997,0.9578544061302682,0.9808429118773946,0.7739463601532567,0.7739463601532567,0.30906768837803317,0.9272030651340997,0.19157088122605365,0.9578544061302682,0.09808429118773947,0.9808429118773946,0.8563370431186524,0.8873142219416177,0.8576794640013031
README.md CHANGED
@@ -10,7 +10,7 @@ tags:
10
 
11
  ---
12
 
13
- # dell-research-harvard/lt-un-data-fine-coarse-fr
14
 
15
  This is a [LinkTransformer](https://linktransformer.github.io/) model. At its core this model this is a sentence transformer model [sentence-transformers](https://www.SBERT.net) model- it just wraps around the class.
16
  It is designed for quick and easy record linkage (entity-matching) through the LinkTransformer package. The tasks include clustering, deduplication, linking, aggregation and more.
 
10
 
11
  ---
12
 
13
+ # {MODEL_NAME}
14
 
15
  This is a [LinkTransformer](https://linktransformer.github.io/) model. At its core this model this is a sentence transformer model [sentence-transformers](https://www.SBERT.net) model- it just wraps around the class.
16
  It is designed for quick and easy record linkage (entity-matching) through the LinkTransformer package. The tasks include clustering, deduplication, linking, aggregation and more.
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "models/linkage_un_data_fr_fine_coarse",
3
  "architectures": [
4
  "CamembertModel"
5
  ],
 
1
  {
2
+ "_name_or_path": "dangvantuan/sentence-camembert-large",
3
  "architectures": [
4
  "CamembertModel"
5
  ],
eval/Information-Retrieval_evaluation_eval_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -3,27 +3,9 @@
3
  "<s>NOTUSED",
4
  "</s>NOTUSED"
5
  ],
6
- "bos_token": {
7
- "content": "<s>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false
12
- },
13
- "cls_token": {
14
- "content": "<s>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false
19
- },
20
- "eos_token": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false
26
- },
27
  "mask_token": {
28
  "content": "<mask>",
29
  "lstrip": true,
@@ -31,25 +13,7 @@
31
  "rstrip": false,
32
  "single_word": false
33
  },
34
- "pad_token": {
35
- "content": "<pad>",
36
- "lstrip": false,
37
- "normalized": false,
38
- "rstrip": false,
39
- "single_word": false
40
- },
41
- "sep_token": {
42
- "content": "</s>",
43
- "lstrip": false,
44
- "normalized": false,
45
- "rstrip": false,
46
- "single_word": false
47
- },
48
- "unk_token": {
49
- "content": "<unk>",
50
- "lstrip": false,
51
- "normalized": false,
52
- "rstrip": false,
53
- "single_word": false
54
- }
55
  }
 
3
  "<s>NOTUSED",
4
  "</s>NOTUSED"
5
  ],
6
+ "bos_token": "<s>",
7
+ "cls_token": "<s>",
8
+ "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "mask_token": {
10
  "content": "<mask>",
11
  "lstrip": true,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "<pad>",
17
+ "sep_token": "</s>",
18
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
test_data.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43f227e16bf6c140dacf290f7318dca890a0dd320b11ba3149802313a47f23db
3
+ size 32258
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -66,17 +66,10 @@
66
  "cls_token": "<s>",
67
  "eos_token": "</s>",
68
  "mask_token": "<mask>",
69
- "max_length": 514,
70
  "model_max_length": 1000000000000000019884624838656,
71
- "pad_to_multiple_of": null,
72
  "pad_token": "<pad>",
73
- "pad_token_type_id": 0,
74
- "padding_side": "right",
75
  "sep_token": "</s>",
76
  "sp_model_kwargs": {},
77
- "stride": 0,
78
  "tokenizer_class": "CamembertTokenizer",
79
- "truncation_side": "right",
80
- "truncation_strategy": "longest_first",
81
  "unk_token": "<unk>"
82
  }
 
66
  "cls_token": "<s>",
67
  "eos_token": "</s>",
68
  "mask_token": "<mask>",
 
69
  "model_max_length": 1000000000000000019884624838656,
 
70
  "pad_token": "<pad>",
 
 
71
  "sep_token": "</s>",
72
  "sp_model_kwargs": {},
 
73
  "tokenizer_class": "CamembertTokenizer",
 
 
74
  "unk_token": "<unk>"
75
  }
val_data.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19c473d4402745303e49184b01708b83a2d0f6cd54e36eabefbcf150ee4731ba
3
+ size 187515