First model version
Browse files- README.md +6 -0
- config.json +30 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer.json +1 -0
- tokenizer_config.json +1 -0
README.md
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ELECTRA-minuscule-discriminator
|
2 |
+
===============================
|
3 |
+
|
4 |
+
A ridiculously small ELECTRA discriminator model for testing purposes.
|
5 |
+
|
6 |
+
**THIS MODEL HAS NOT BEEN TRAINED, DO NOT EXPECT ANYThING OF IT.**
|
config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": ".",
|
3 |
+
"architectures": [
|
4 |
+
"ElectraForTokenClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"embedding_size": 32,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 64,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 256,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "electra",
|
17 |
+
"num_attention_heads": 2,
|
18 |
+
"num_hidden_layers": 2,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"summary_activation": "gelu",
|
22 |
+
"summary_last_dropout": 0.1,
|
23 |
+
"summary_type": "first",
|
24 |
+
"summary_use_proj": true,
|
25 |
+
"torch_dtype": "float32",
|
26 |
+
"transformers_version": "4.15.0",
|
27 |
+
"type_vocab_size": 2,
|
28 |
+
"use_cache": true,
|
29 |
+
"vocab_size": 256
|
30 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b68c4536633bf10bf12c029a65586ca64d75eaccff15fa95cb939459ea0644d
|
3 |
+
size 527143
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
|
tokenizer.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"version":"1.0","truncation":{"max_length":510,"strategy":"LongestFirst","stride":0},"padding":null,"added_tokens":[{"id":0,"special":true,"content":"<s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"<pad>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"</s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":3,"special":true,"content":"<unk>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":4,"special":true,"content":"<mask>","single_word":false,"lstrip":true,"rstrip":false,"normalized":true}],"normalizer":null,"pre_tokenizer":{"type":"ByteLevel","add_prefix_space":false,"trim_offsets":true},"post_processor":{"type":"RobertaProcessing","sep":["</s>",2],"cls":["<s>",0],"trim_offsets":true,"add_prefix_space":true},"decoder":null,"model":{"type":"BPE","dropout":null,"unk_token":"<unk>","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"vocab":{"<s>":0,"<pad>":1,"</s>":2,"<unk>":3,"<mask>":4,"!":5,"(":6,")":7,"*":8,",":9,"-":10,".":11,"/":12,"0":13,"1":14,"2":15,"3":16,"4":17,"5":18,"6":19,"7":20,"8":21,":":22,";":23,"A":24,"B":25,"C":26,"D":27,"E":28,"F":29,"G":30,"H":31,"I":32,"J":33,"L":34,"M":35,"N":36,"O":37,"P":38,"Q":39,"R":40,"S":41,"T":42,"U":43,"V":44,"W":45,"Y":46,"[":47,"]":48,"_":49,"a":50,"b":51,"c":52,"d":53,"e":54,"f":55,"g":56,"h":57,"i":58,"j":59,"k":60,"l":61,"m":62,"n":63,"o":64,"p":65,"q":66,"r":67,"s":68,"t":69,"u":70,"v":71,"w":72,"x":73,"y":74,"z":75,"¦":76,"§":77,"¨":78,"©":79,"ª":80,"«":81,"®":82,"¯":83,"´":84,"¶":85,"»":86,"Â":87,"Ã":88,"â":89,"Ċ":90,"Ġ":91,"Ģ":92,"Ĩ":93,"ī":94,"ij":95,"Ķ":96,"Ļ":97,"ł":98,"de":99,"Ġt":100,"Ġl":101,"ou":102,"Ġe":103,"Ġp":104,"Ġm":105,"Ġc":106,"Ġde":107,"âĢ":108,"âĢĻ":109,"nt":110,"Ġs":111,"re":112,"qu":113,"é":114,"es":115,"on":116,"er":117,"ai":118,"Ġet":119,"Ġmo":120,"Ġqu":121,"eu":122,"Ġa":123,"Ġd":124,"Ġpa":125,"me":126,"Ġmode":127,"Ġn":128,"Ġle":129,"Ġtr":130,"le":131,"Ġla":132,"our":133,"in":134,"om":135,"Ġque":136,"ie":137,"ant":138,"Ġtu":139,"Ġou":140,"an":141,"us":142,"Ġen":143,"eur":144,"is":145,"or":146,"Ġv":147,"Ġpl":148,"Ãł":149,"un":150,"os":151,"il":152,"ais":153,"Ġé":154,"au":155,"it":156,"Ġf":157,"Ġse":158,"Ġcom":159,"Ġch":160,"ch":161,"ĠÃł":162,"Ġtou":163,"Ġles":164,"Ġpar":165,"um":166,"Ġj":167,"te":168,"ur":169,"onn":170,"Ġmin":171,"Ġdes":172,"st":173,"Ġ;":174,"ien":175,"ti":176,"Ġpas":177,"en":178,"Ġton":179,"Ġét":180,"Ġcomme":181,"Ġmineur":182,"ium":183,"Ġpour":184,"Ġplus":185,"tre":186,"Ġb":187,"ent":188,"Ġre":189,"ce":190,"Ġg":191,"as":192,"eux":193,"ĠL":194,"ans":195,"è":196,"ain":197,"air":198,"Ġne":199,"ire":200,"pp":201,"que":202,"Ġac":203,"ours":204,"Ġy":205,"Ġma":206,"Ġtrou":207,"Ġtris":208,"Ġchant":209,"ĠM":210,"Ġce":211,"ons":212,"ar":213,"ver":214,"Ġest":215,"lle":216,"Ġo":217,"hor":218,"Ġqui":219,"Ġsou":220,"ment":221,"uran":222,"dre":223,"Ġni":224,"ag":225,"tes":226,"Ġcor":227,"és":228,"ins":229,"Ġver":230,"ois":231,"ux":232,"vant":233,"ys":234,"ĠV":235,"Ġsi":236,"erch":237,"Ġmoins":238,"iel":239,"Ġtout":240,"iè":241,"jours":242,"ne":243,"Ġent":244,"Ġcl":245,"Ġcar":246,"Ġdeux":247,"Ġson":248,"Ġdans":249,"Ġcherch":250,"Ġtoujours":251,"Ġbien":252,"ĠLa":253,"Ġacti":254,"elle":255},"merges":["d e","Ġ t","Ġ l","o u","Ġ e","Ġ p","Ġ m","Ġ c","Ġ de","â Ģ","âĢ Ļ","n t","Ġ s","r e","q u","à ©","e s","o n","e r","a i","Ġe t","Ġm o","Ġ qu","e u","Ġ a","Ġ d","Ġp a","m e","Ġmo de","Ġ n","Ġl e","Ġt r","l e","Ġl a","ou r","i n","o m","Ġqu e","i e","a nt","Ġt u","Ġ ou","a n","u s","Ġe n","eu r","i s","o r","Ġ v","Ġp l","à ł","u n","o s","i l","ai s","Ġ é","a u","i t","Ġ f","Ġs e","Ġc om","Ġc h","c h","Ġ Ãł","Ġt ou","Ġl es","Ġpa r","u m","Ġ j","t e","u r","on n","Ġm in","Ġde s","s t","Ġ ;","ie n","t i","Ġpa s","e n","Ġt on","Ġé t","Ġcom me","Ġmin eur","i um","Ġp our","Ġpl us","t re","Ġ b","e nt","Ġ re","c e","Ġ g","a s","eu x","Ġ L","an s","à ¨","ai n","ai r","Ġn e","i re","p p","qu e","Ġa c","our s","Ġ y","Ġm a","Ġtr ou","Ġtr is","Ġch ant","Ġ M","Ġc e","on s","a r","v er","Ġe st","l le","Ġ o","h or","Ġqu i","Ġs ou","me nt","ur an","d re","Ġn i","a g","t es","Ġc or","é s","in s","Ġv er","o is","u x","v ant","y s","Ġ V","Ġs i","er ch","Ġmo ins","ie l","Ġtou t","i è","j ours","n e","Ġe nt","Ġc l","Ġc ar","Ġde ux","Ġs on","Ġd ans","Ġch erch","Ġtou jours","Ġb ien","ĠL a","Ġac ti","e lle"]}}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "max_len": 512, "special_tokens_map_file": "local/tokenizer/roberta-tiny/special_tokens_map.json", "name_or_path": "local/tokenizer/roberta-tiny", "tokenizer_class": "RobertaTokenizer"}
|