Doron Adler
commited on
Commit
•
9795464
1
Parent(s):
bffdb0a
Further train for another 800K steps on CC-100, Twitter, Updated Wikipedia
Browse files- .gitattributes +1 -0
- config.json +3 -1
- distilgpt2-base-pretrained-he.mlpackage/Data/com.apple.CoreML/FeatureDescriptions.json +3 -3
- distilgpt2-base-pretrained-he.mlpackage/Data/com.apple.CoreML/Metadata.json +1 -1
- distilgpt2-base-pretrained-he.mlpackage/Data/com.apple.CoreML/model.mlmodel +1 -1
- distilgpt2-base-pretrained-he.mlpackage/Manifest.json +9 -9
- flax_model.msgpack +1 -1
- model.onnx +1 -1
- pytorch_model.bin +2 -2
- tf_model.h5 +3 -0
- tokenizer.json +6 -3
- events.out.tfevents.1626785892.t1v-n-d9fb8529-w-0.668251.3.v2 → training_args.bin +2 -2
.gitattributes
CHANGED
@@ -16,3 +16,4 @@
|
|
16 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
17 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
18 |
model.onnx filter=lfs diff=lfs merge=lfs -text
|
|
|
|
16 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
17 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
18 |
model.onnx filter=lfs diff=lfs merge=lfs -text
|
19 |
+
tf_model.h5 filter=lfs diff=lfs merge=lfs -text
|
config.json
CHANGED
@@ -26,7 +26,9 @@
|
|
26 |
"n_layer": 6,
|
27 |
"n_positions": 1024,
|
28 |
"pad_token_id": 50257,
|
|
|
29 |
"resid_pdrop": 0.1,
|
|
|
30 |
"scale_attn_weights": true,
|
31 |
"summary_activation": null,
|
32 |
"summary_first_dropout": 0.1,
|
@@ -40,7 +42,7 @@
|
|
40 |
}
|
41 |
},
|
42 |
"torch_dtype": "float32",
|
43 |
-
"transformers_version": "4.
|
44 |
"use_cache": true,
|
45 |
"vocab_size": 50257
|
46 |
}
|
|
|
26 |
"n_layer": 6,
|
27 |
"n_positions": 1024,
|
28 |
"pad_token_id": 50257,
|
29 |
+
"reorder_and_upcast_attn": false,
|
30 |
"resid_pdrop": 0.1,
|
31 |
+
"scale_attn_by_inverse_layer_idx": false,
|
32 |
"scale_attn_weights": true,
|
33 |
"summary_activation": null,
|
34 |
"summary_first_dropout": 0.1,
|
|
|
42 |
}
|
43 |
},
|
44 |
"torch_dtype": "float32",
|
45 |
+
"transformers_version": "4.22.0.dev0",
|
46 |
"use_cache": true,
|
47 |
"vocab_size": 50257
|
48 |
}
|
distilgpt2-base-pretrained-he.mlpackage/Data/com.apple.CoreML/FeatureDescriptions.json
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
{
|
2 |
"Outputs" : {
|
3 |
"output_logits" : {
|
4 |
-
"MLFeatureShortDescription" : ""
|
5 |
}
|
6 |
},
|
7 |
"Inputs" : {
|
8 |
"position_ids" : {
|
9 |
-
"MLFeatureShortDescription" : ""
|
10 |
},
|
11 |
"input_ids" : {
|
12 |
-
"MLFeatureShortDescription" : ""
|
13 |
}
|
14 |
},
|
15 |
"TrainingInputs" : {
|
|
|
1 |
{
|
2 |
"Outputs" : {
|
3 |
"output_logits" : {
|
4 |
+
"MLFeatureShortDescription" : "--"
|
5 |
}
|
6 |
},
|
7 |
"Inputs" : {
|
8 |
"position_ids" : {
|
9 |
+
"MLFeatureShortDescription" : "--"
|
10 |
},
|
11 |
"input_ids" : {
|
12 |
+
"MLFeatureShortDescription" : "--"
|
13 |
}
|
14 |
},
|
15 |
"TrainingInputs" : {
|
distilgpt2-base-pretrained-he.mlpackage/Data/com.apple.CoreML/Metadata.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"MLModelVersionStringKey" : "1.
|
3 |
"MLModelDescriptionKey" : "hebrew-distilgpt2\n\nA tiny GPT2 based Hebrew text generation model trained on a TPUv3-8 via the TPU Research Cloud Program.",
|
4 |
"MLModelCreatorDefinedKey" : {
|
5 |
"model_card_url" : "https:\/\/huggingface.co\/Norod78\/distilgpt2-base-pretrained-he"
|
|
|
1 |
{
|
2 |
+
"MLModelVersionStringKey" : "1.01",
|
3 |
"MLModelDescriptionKey" : "hebrew-distilgpt2\n\nA tiny GPT2 based Hebrew text generation model trained on a TPUv3-8 via the TPU Research Cloud Program.",
|
4 |
"MLModelCreatorDefinedKey" : {
|
5 |
"model_card_url" : "https:\/\/huggingface.co\/Norod78\/distilgpt2-base-pretrained-he"
|
distilgpt2-base-pretrained-he.mlpackage/Data/com.apple.CoreML/model.mlmodel
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 482254328
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3873d1a72c554711785b15a1b27af3824f1269c2efdc1de65181f1031b6565fa
|
3 |
size 482254328
|
distilgpt2-base-pretrained-he.mlpackage/Manifest.json
CHANGED
@@ -1,24 +1,24 @@
|
|
1 |
{
|
2 |
"fileFormatVersion": "1.0.0",
|
3 |
"itemInfoEntries": {
|
4 |
-
"
|
5 |
"author": "com.apple.CoreML",
|
6 |
"description": "External FeatureDescription Overlay",
|
7 |
"name": "FeatureDescriptions.json",
|
8 |
"path": "com.apple.CoreML/FeatureDescriptions.json"
|
9 |
},
|
10 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
"author": "com.apple.CoreML",
|
12 |
"description": "External Metadata Overlay",
|
13 |
"name": "Metadata.json",
|
14 |
"path": "com.apple.CoreML/Metadata.json"
|
15 |
-
},
|
16 |
-
"FD401BB0-2CA8-4DB7-BAD6-23B171A68404": {
|
17 |
-
"author": "com.apple.CoreML",
|
18 |
-
"description": "CoreML Model Specification",
|
19 |
-
"name": "distilgpt2-base-pretrained-he-64-6.mlmodel",
|
20 |
-
"path": "com.apple.CoreML/model.mlmodel"
|
21 |
}
|
22 |
},
|
23 |
-
"rootModelIdentifier": "
|
24 |
}
|
|
|
1 |
{
|
2 |
"fileFormatVersion": "1.0.0",
|
3 |
"itemInfoEntries": {
|
4 |
+
"5CA8030A-3376-40B9-9F77-FE7151EBE0F7": {
|
5 |
"author": "com.apple.CoreML",
|
6 |
"description": "External FeatureDescription Overlay",
|
7 |
"name": "FeatureDescriptions.json",
|
8 |
"path": "com.apple.CoreML/FeatureDescriptions.json"
|
9 |
},
|
10 |
+
"5EFA4247-BC5C-47CE-9E64-F1747845A076": {
|
11 |
+
"author": "com.apple.CoreML",
|
12 |
+
"description": "CoreML Model Specification",
|
13 |
+
"name": "model.mlmodel",
|
14 |
+
"path": "com.apple.CoreML/model.mlmodel"
|
15 |
+
},
|
16 |
+
"63DB2BDF-B3FD-4CE4-9251-66F29CF34545": {
|
17 |
"author": "com.apple.CoreML",
|
18 |
"description": "External Metadata Overlay",
|
19 |
"name": "Metadata.json",
|
20 |
"path": "com.apple.CoreML/Metadata.json"
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
}
|
22 |
},
|
23 |
+
"rootModelIdentifier": "5EFA4247-BC5C-47CE-9E64-F1747845A076"
|
24 |
}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 327652826
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fda64ecb202c9ee0b7b7575f8db1c84efd42967f979ff7d1b2f2b0d32fd8b801
|
3 |
size 327652826
|
model.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 488438673
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5276bfcba7af74619c75c10e9115f4198e2e30352a7cf0e7155c5152f236360
|
3 |
size 488438673
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bef275decdf17187b39d8e6e0a4cd1213c9d8970b2dc737c9139152d9180b295
|
3 |
+
size 333969117
|
tf_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21b09d55184b71e392d4b6ef2d1e064b34668413849a99d1a86a7312f6cf151e
|
3 |
+
size 327744824
|
tokenizer.json
CHANGED
@@ -53,17 +53,20 @@
|
|
53 |
"pre_tokenizer": {
|
54 |
"type": "ByteLevel",
|
55 |
"add_prefix_space": false,
|
56 |
-
"trim_offsets": true
|
|
|
57 |
},
|
58 |
"post_processor": {
|
59 |
"type": "ByteLevel",
|
60 |
"add_prefix_space": true,
|
61 |
-
"trim_offsets": false
|
|
|
62 |
},
|
63 |
"decoder": {
|
64 |
"type": "ByteLevel",
|
65 |
"add_prefix_space": true,
|
66 |
-
"trim_offsets": true
|
|
|
67 |
},
|
68 |
"model": {
|
69 |
"type": "BPE",
|
|
|
53 |
"pre_tokenizer": {
|
54 |
"type": "ByteLevel",
|
55 |
"add_prefix_space": false,
|
56 |
+
"trim_offsets": true,
|
57 |
+
"use_regex": true
|
58 |
},
|
59 |
"post_processor": {
|
60 |
"type": "ByteLevel",
|
61 |
"add_prefix_space": true,
|
62 |
+
"trim_offsets": false,
|
63 |
+
"use_regex": true
|
64 |
},
|
65 |
"decoder": {
|
66 |
"type": "ByteLevel",
|
67 |
"add_prefix_space": true,
|
68 |
+
"trim_offsets": true,
|
69 |
+
"use_regex": true
|
70 |
},
|
71 |
"model": {
|
72 |
"type": "BPE",
|
events.out.tfevents.1626785892.t1v-n-d9fb8529-w-0.668251.3.v2 → training_args.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2e3602b258d62c2b8a7bd97787a1787754e6a8e44f598e6a5cd298cb5e3fe5a
|
3 |
+
size 3439
|