Saiful Haq commited on
Commit
13c618a
1 Parent(s): 5f776d0

Added language: guj_Gujr

Browse files
guj_Gujr-nllb1.3b-moses/colbert-50000/artifact.metadata ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "query_token_id": "[unused0]",
3
+ "doc_token_id": "[unused1]",
4
+ "query_token": "[Q]",
5
+ "doc_token": "[D]",
6
+ "ncells": null,
7
+ "centroid_score_threshold": null,
8
+ "ndocs": null,
9
+ "index_path": null,
10
+ "nbits": 1,
11
+ "kmeans_niters": 4,
12
+ "resume": false,
13
+ "similarity": "cosine",
14
+ "bsize": 128,
15
+ "accumsteps": 1,
16
+ "lr": 3e-6,
17
+ "maxsteps": 500000,
18
+ "save_every": null,
19
+ "warmup": null,
20
+ "warmup_bert": null,
21
+ "relu": false,
22
+ "nway": 2,
23
+ "use_ib_negatives": false,
24
+ "reranker": false,
25
+ "distillation_alpha": 1.0,
26
+ "ignore_scores": false,
27
+ "model_name": null,
28
+ "query_maxlen": 32,
29
+ "attend_to_mask_tokens": false,
30
+ "interaction": "colbert",
31
+ "dim": 128,
32
+ "doc_maxlen": 220,
33
+ "mask_punctuation": true,
34
+ "checkpoint": "bert-base-multilingual-uncased",
35
+ "triples": "data\/\/triples.train.tsv",
36
+ "collection": "..\/data_nllb_1.3B_quantized_8bit\/256\/guj_Gujr\/\/collection.tsv",
37
+ "queries": "..\/data_nllb_1.3B_quantized_8bit\/256\/guj_Gujr\/query.train.tsv",
38
+ "index_name": null,
39
+ "overwrite": false,
40
+ "root": "\/saif\/saif_ir\/indicIRsuite\/ColBERT\/experiments",
41
+ "experiment": "mmarco_moses_nllb_1.3B",
42
+ "index_root": null,
43
+ "name": "2023-10\/05\/02.45.26",
44
+ "rank": 0,
45
+ "nranks": 1,
46
+ "amp": true,
47
+ "gpus": 1,
48
+ "meta": {
49
+ "hostname": "ddbf18d969d5",
50
+ "git_branch": "main",
51
+ "git_hash": "43ae55077c5c793d382b7ee3ed6b57630bbcd22e",
52
+ "git_commit_datetime": "2023-04-19 17:21:22-07:00",
53
+ "current_datetime": "Oct 05, 2023 ; 12:03PM UTC (+0000)",
54
+ "cmd": "train_multilingual-final.py guj_Gujr nllb_1.3B",
55
+ "version": "colbert-v0.4"
56
+ }
57
+ }
guj_Gujr-nllb1.3b-moses/colbert-50000/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-multilingual-uncased",
3
+ "architectures": [
4
+ "HF_ColBERT"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "pooler_fc_size": 768,
21
+ "pooler_num_attention_heads": 12,
22
+ "pooler_num_fc_layers": 3,
23
+ "pooler_size_per_head": 128,
24
+ "pooler_type": "first_token_transform",
25
+ "position_embedding_type": "absolute",
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.28.1",
28
+ "type_vocab_size": 2,
29
+ "use_cache": true,
30
+ "vocab_size": 105879
31
+ }
guj_Gujr-nllb1.3b-moses/colbert-50000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f41f988bddf342acccecec26a93d25d5a7cff72a362c09105b0c01cf2f57a31d
3
+ size 669891377
guj_Gujr-nllb1.3b-moses/colbert-50000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
guj_Gujr-nllb1.3b-moses/colbert-50000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
guj_Gujr-nllb1.3b-moses/colbert-50000/tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "BertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }
guj_Gujr-nllb1.3b-moses/colbert-50000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff