arver commited on
Commit
6770390
1 Parent(s): 95941a8

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -1,34 +1,8 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
  *.tflite filter=lfs diff=lfs merge=lfs -text
29
- *.tgz filter=lfs diff=lfs merge=lfs -text
30
- *.wasm filter=lfs diff=lfs merge=lfs -text
31
- *.xz filter=lfs diff=lfs merge=lfs -text
32
- *.zip filter=lfs diff=lfs merge=lfs -text
33
- *.zst filter=lfs diff=lfs merge=lfs -text
34
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
2
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
 
 
 
4
  *.h5 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  *.tflite filter=lfs diff=lfs merge=lfs -text
6
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.ot filter=lfs diff=lfs merge=lfs -text
8
+ *.onnx filter=lfs diff=lfs merge=lfs -text
 
 
 
README.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This model is [Distilbert base uncased](https://huggingface.co/distilbert-base-uncased) trained on SQuAD v2 as:
2
+
3
+ ```
4
+ export SQUAD_DIR=../../squad2
5
+ python3 run_squad.py
6
+ --model_type distilbert
7
+ --model_name_or_path distilbert-base-uncased
8
+ --do_train
9
+ --do_eval
10
+ --overwrite_cache
11
+ --do_lower_case
12
+ --version_2_with_negative
13
+ --save_steps 100000
14
+ --train_file $SQUAD_DIR/train-v2.0.json
15
+ --predict_file $SQUAD_DIR/dev-v2.0.json
16
+ --per_gpu_train_batch_size 8
17
+ --num_train_epochs 3
18
+ --learning_rate 3e-5
19
+ --max_seq_length 384
20
+ --doc_stride 128
21
+ --output_dir ./tmp/distilbert_fine_tuned/
22
+ ```
23
+
24
+ Performance on a dev subset is close to the original paper:
25
+
26
+ ```
27
+ Results:
28
+ {
29
+ 'exact': 64.88976637051661,
30
+ 'f1': 68.1776176526635,
31
+ 'total': 6078,
32
+ 'HasAns_exact': 69.7594501718213,
33
+ 'HasAns_f1': 76.62665295288285,
34
+ 'HasAns_total': 2910,
35
+ 'NoAns_exact': 60.416666666666664,
36
+ 'NoAns_f1': 60.416666666666664,
37
+ 'NoAns_total': 3168,
38
+ 'best_exact': 64.88976637051661,
39
+ 'best_exact_thresh': 0.0,
40
+ 'best_f1': 68.17761765266337,
41
+ 'best_f1_thresh': 0.0
42
+ }
43
+ ```
44
+
45
+ We are hopeful this might save you time, energy, and compute. Cheers!
config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForQuestionAnswering"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "eos_token_ids": null,
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "output_past": true,
17
+ "qa_dropout": 0.1,
18
+ "seq_classif_dropout": 0.2,
19
+ "sinusoidal_pos_embds": false,
20
+ "tie_weights_": true,
21
+ "vocab_size": 30522
22
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:658b79b0f3a536b633efefcdd424c836867e7c1213fdb7e0aabd426809f86694
3
+ size 265482418
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "max_len": 512}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a03aa6fece54ff48734e95565bf1eb9285a1bd8a67c63065d83abaaccb44b2d
3
+ size 1452
vocab.txt ADDED
The diff for this file is too large to render. See raw diff