samanehs commited on
Commit
7feb9b7
1 Parent(s): eda7454

Upload folder using huggingface_hub

Browse files
assets/tokenizer/vocabulary.txt ADDED
The diff for this file is too large to render. See raw diff
 
config.json CHANGED
@@ -1,16 +1,17 @@
1
  {
2
- "module": "keras_nlp.src.models.gpt2.gpt2_backbone",
3
- "class_name": "GPT2Backbone",
4
  "config": {
5
- "name": "gpt2_backbone",
6
  "trainable": true,
7
- "vocabulary_size": 50257,
8
- "num_layers": 12,
9
- "num_heads": 12,
10
- "hidden_dim": 768,
11
- "intermediate_dim": 3072,
12
  "dropout": 0.1,
13
- "max_sequence_length": 1024
 
14
  },
15
- "registered_name": "keras_nlp>GPT2Backbone"
16
  }
 
1
  {
2
+ "module": "keras_nlp.src.models.bert.bert_backbone",
3
+ "class_name": "BertBackbone",
4
  "config": {
5
+ "name": "bert_backbone",
6
  "trainable": true,
7
+ "vocabulary_size": 30522,
8
+ "num_layers": 2,
9
+ "num_heads": 2,
10
+ "hidden_dim": 128,
11
+ "intermediate_dim": 512,
12
  "dropout": 0.1,
13
+ "max_sequence_length": 512,
14
+ "num_segments": 2
15
  },
16
+ "registered_name": "keras_nlp>BertBackbone"
17
  }
metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "keras_version": "3.3.3",
3
  "keras_nlp_version": "0.10.0",
4
- "parameter_count": 124439808,
5
- "date_saved": "2024-05-02@01:13:08"
6
  }
 
1
  {
2
  "keras_version": "3.3.3",
3
  "keras_nlp_version": "0.10.0",
4
+ "parameter_count": 4385920,
5
+ "date_saved": "2024-05-02@01:15:04"
6
  }
model.weights.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73faf6047a426161b11cb2a39ff951e04e42e6fc756038fef31b32e3a631b699
3
- size 498160592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cd686cb39d95b258d02990e71512600d26c06e902f3b2f7d9ec1a6e4f559efc
3
+ size 17632104
preprocessor.json CHANGED
@@ -1,25 +1,29 @@
1
  {
2
- "module": "keras_nlp.src.models.gpt2.gpt2_causal_lm_preprocessor",
3
- "class_name": "GPT2CausalLMPreprocessor",
4
  "config": {
5
- "name": "gpt2_causal_lm_preprocessor",
6
  "trainable": true,
7
  "dtype": "float32",
8
  "tokenizer": {
9
- "module": "keras_nlp.src.models.gpt2.gpt2_tokenizer",
10
- "class_name": "GPT2Tokenizer",
11
  "config": {
12
- "name": "gpt2_tokenizer",
13
  "trainable": true,
14
  "dtype": "int32",
 
15
  "sequence_length": null,
16
- "add_prefix_space": false
 
 
 
 
17
  },
18
- "registered_name": "keras_nlp>GPT2Tokenizer"
19
  },
20
- "sequence_length": 1024,
21
- "add_start_token": true,
22
- "add_end_token": true
23
  },
24
- "registered_name": "keras_nlp>GPT2CausalLMPreprocessor"
25
  }
 
1
  {
2
+ "module": "keras_nlp.src.models.bert.bert_preprocessor",
3
+ "class_name": "BertPreprocessor",
4
  "config": {
5
+ "name": "bert_preprocessor",
6
  "trainable": true,
7
  "dtype": "float32",
8
  "tokenizer": {
9
+ "module": "keras_nlp.src.models.bert.bert_tokenizer",
10
+ "class_name": "BertTokenizer",
11
  "config": {
12
+ "name": "bert_tokenizer",
13
  "trainable": true,
14
  "dtype": "int32",
15
+ "vocabulary": null,
16
  "sequence_length": null,
17
+ "lowercase": true,
18
+ "strip_accents": false,
19
+ "split": true,
20
+ "suffix_indicator": "##",
21
+ "oov_token": "[UNK]"
22
  },
23
+ "registered_name": "keras_nlp>BertTokenizer"
24
  },
25
+ "sequence_length": 512,
26
+ "truncate": "round_robin"
 
27
  },
28
+ "registered_name": "keras_nlp>BertPreprocessor"
29
  }
task.json CHANGED
@@ -1,49 +1,57 @@
1
  {
2
- "module": "keras_nlp.src.models.gpt2.gpt2_causal_lm",
3
- "class_name": "GPT2CausalLM",
4
  "config": {
5
  "backbone": {
6
- "module": "keras_nlp.src.models.gpt2.gpt2_backbone",
7
- "class_name": "GPT2Backbone",
8
  "config": {
9
- "name": "gpt2_backbone",
10
  "trainable": true,
11
- "vocabulary_size": 50257,
12
- "num_layers": 12,
13
- "num_heads": 12,
14
- "hidden_dim": 768,
15
- "intermediate_dim": 3072,
16
  "dropout": 0.1,
17
- "max_sequence_length": 1024
 
18
  },
19
- "registered_name": "keras_nlp>GPT2Backbone"
20
  },
21
  "preprocessor": {
22
- "module": "keras_nlp.src.models.gpt2.gpt2_causal_lm_preprocessor",
23
- "class_name": "GPT2CausalLMPreprocessor",
24
  "config": {
25
- "name": "gpt2_causal_lm_preprocessor",
26
  "trainable": true,
27
  "dtype": "float32",
28
  "tokenizer": {
29
- "module": "keras_nlp.src.models.gpt2.gpt2_tokenizer",
30
- "class_name": "GPT2Tokenizer",
31
  "config": {
32
- "name": "gpt2_tokenizer",
33
  "trainable": true,
34
  "dtype": "int32",
 
35
  "sequence_length": null,
36
- "add_prefix_space": false
 
 
 
 
37
  },
38
- "registered_name": "keras_nlp>GPT2Tokenizer"
39
  },
40
- "sequence_length": 1024,
41
- "add_start_token": true,
42
- "add_end_token": true
43
  },
44
- "registered_name": "keras_nlp>GPT2CausalLMPreprocessor"
45
  },
46
- "name": "gpt2_causal_lm"
 
 
 
47
  },
48
- "registered_name": "keras_nlp>GPT2CausalLM"
49
  }
 
1
  {
2
+ "module": "keras_nlp.src.models.bert.bert_classifier",
3
+ "class_name": "BertClassifier",
4
  "config": {
5
  "backbone": {
6
+ "module": "keras_nlp.src.models.bert.bert_backbone",
7
+ "class_name": "BertBackbone",
8
  "config": {
9
+ "name": "bert_backbone",
10
  "trainable": true,
11
+ "vocabulary_size": 30522,
12
+ "num_layers": 2,
13
+ "num_heads": 2,
14
+ "hidden_dim": 128,
15
+ "intermediate_dim": 512,
16
  "dropout": 0.1,
17
+ "max_sequence_length": 512,
18
+ "num_segments": 2
19
  },
20
+ "registered_name": "keras_nlp>BertBackbone"
21
  },
22
  "preprocessor": {
23
+ "module": "keras_nlp.src.models.bert.bert_preprocessor",
24
+ "class_name": "BertPreprocessor",
25
  "config": {
26
+ "name": "bert_preprocessor",
27
  "trainable": true,
28
  "dtype": "float32",
29
  "tokenizer": {
30
+ "module": "keras_nlp.src.models.bert.bert_tokenizer",
31
+ "class_name": "BertTokenizer",
32
  "config": {
33
+ "name": "bert_tokenizer",
34
  "trainable": true,
35
  "dtype": "int32",
36
+ "vocabulary": null,
37
  "sequence_length": null,
38
+ "lowercase": true,
39
+ "strip_accents": false,
40
+ "split": true,
41
+ "suffix_indicator": "##",
42
+ "oov_token": "[UNK]"
43
  },
44
+ "registered_name": "keras_nlp>BertTokenizer"
45
  },
46
+ "sequence_length": 512,
47
+ "truncate": "round_robin"
 
48
  },
49
+ "registered_name": "keras_nlp>BertPreprocessor"
50
  },
51
+ "name": "bert_classifier",
52
+ "num_classes": 2,
53
+ "activation": "linear",
54
+ "dropout": 0.1
55
  },
56
+ "registered_name": "keras_nlp>BertClassifier"
57
  }
task.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb6e04f49859cc11d6fe47a002594165a9082f5e13182fe60a0b83575c10b439
3
+ size 52766840
tokenizer.json CHANGED
@@ -1,12 +1,17 @@
1
  {
2
- "module": "keras_nlp.src.models.gpt2.gpt2_tokenizer",
3
- "class_name": "GPT2Tokenizer",
4
  "config": {
5
- "name": "gpt2_tokenizer",
6
  "trainable": true,
7
  "dtype": "int32",
 
8
  "sequence_length": null,
9
- "add_prefix_space": false
 
 
 
 
10
  },
11
- "registered_name": "keras_nlp>GPT2Tokenizer"
12
  }
 
1
  {
2
+ "module": "keras_nlp.src.models.bert.bert_tokenizer",
3
+ "class_name": "BertTokenizer",
4
  "config": {
5
+ "name": "bert_tokenizer",
6
  "trainable": true,
7
  "dtype": "int32",
8
+ "vocabulary": null,
9
  "sequence_length": null,
10
+ "lowercase": true,
11
+ "strip_accents": false,
12
+ "split": true,
13
+ "suffix_indicator": "##",
14
+ "oov_token": "[UNK]"
15
  },
16
+ "registered_name": "keras_nlp>BertTokenizer"
17
  }