lgrobol commited on
Commit
a6f481c
1 Parent(s): 792a25c

First model version

Browse files
README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ELECTRA-minuscule-discriminator
2
+ ===============================
3
+
4
+ A ridiculously small ELECTRA discriminator model for testing purposes.
5
+
6
+ **THIS MODEL HAS NOT BEEN TRAINED, DO NOT EXPECT ANYThING OF IT.**
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": ".",
3
+ "architectures": [
4
+ "ElectraForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "embedding_size": 32,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 64,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 256,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "electra",
17
+ "num_attention_heads": 2,
18
+ "num_hidden_layers": 2,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "summary_activation": "gelu",
22
+ "summary_last_dropout": 0.1,
23
+ "summary_type": "first",
24
+ "summary_use_proj": true,
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.15.0",
27
+ "type_vocab_size": 2,
28
+ "use_cache": true,
29
+ "vocab_size": 256
30
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b68c4536633bf10bf12c029a65586ca64d75eaccff15fa95cb939459ea0644d
3
+ size 527143
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
tokenizer.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"version":"1.0","truncation":{"max_length":510,"strategy":"LongestFirst","stride":0},"padding":null,"added_tokens":[{"id":0,"special":true,"content":"<s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"<pad>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"</s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":3,"special":true,"content":"<unk>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":4,"special":true,"content":"<mask>","single_word":false,"lstrip":true,"rstrip":false,"normalized":true}],"normalizer":null,"pre_tokenizer":{"type":"ByteLevel","add_prefix_space":false,"trim_offsets":true},"post_processor":{"type":"RobertaProcessing","sep":["</s>",2],"cls":["<s>",0],"trim_offsets":true,"add_prefix_space":true},"decoder":null,"model":{"type":"BPE","dropout":null,"unk_token":"<unk>","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"vocab":{"<s>":0,"<pad>":1,"</s>":2,"<unk>":3,"<mask>":4,"!":5,"(":6,")":7,"*":8,",":9,"-":10,".":11,"/":12,"0":13,"1":14,"2":15,"3":16,"4":17,"5":18,"6":19,"7":20,"8":21,":":22,";":23,"A":24,"B":25,"C":26,"D":27,"E":28,"F":29,"G":30,"H":31,"I":32,"J":33,"L":34,"M":35,"N":36,"O":37,"P":38,"Q":39,"R":40,"S":41,"T":42,"U":43,"V":44,"W":45,"Y":46,"[":47,"]":48,"_":49,"a":50,"b":51,"c":52,"d":53,"e":54,"f":55,"g":56,"h":57,"i":58,"j":59,"k":60,"l":61,"m":62,"n":63,"o":64,"p":65,"q":66,"r":67,"s":68,"t":69,"u":70,"v":71,"w":72,"x":73,"y":74,"z":75,"¦":76,"§":77,"¨":78,"©":79,"ª":80,"«":81,"®":82,"¯":83,"´":84,"¶":85,"»":86,"Â":87,"Ã":88,"â":89,"Ċ":90,"Ġ":91,"Ģ":92,"Ĩ":93,"ī":94,"ij":95,"Ķ":96,"Ļ":97,"ł":98,"de":99,"Ġt":100,"Ġl":101,"ou":102,"Ġe":103,"Ġp":104,"Ġm":105,"Ġc":106,"Ġde":107,"âĢ":108,"âĢĻ":109,"nt":110,"Ġs":111,"re":112,"qu":113,"é":114,"es":115,"on":116,"er":117,"ai":118,"Ġet":119,"Ġmo":120,"Ġqu":121,"eu":122,"Ġa":123,"Ġd":124,"Ġpa":125,"me":126,"Ġmode":127,"Ġn":128,"Ġle":129,"Ġtr":130,"le":131,"Ġla":132,"our":133,"in":134,"om":135,"Ġque":136,"ie":137,"ant":138,"Ġtu":139,"Ġou":140,"an":141,"us":142,"Ġen":143,"eur":144,"is":145,"or":146,"Ġv":147,"Ġpl":148,"Ãł":149,"un":150,"os":151,"il":152,"ais":153,"Ġé":154,"au":155,"it":156,"Ġf":157,"Ġse":158,"Ġcom":159,"Ġch":160,"ch":161,"ĠÃł":162,"Ġtou":163,"Ġles":164,"Ġpar":165,"um":166,"Ġj":167,"te":168,"ur":169,"onn":170,"Ġmin":171,"Ġdes":172,"st":173,"Ġ;":174,"ien":175,"ti":176,"Ġpas":177,"en":178,"Ġton":179,"Ġét":180,"Ġcomme":181,"Ġmineur":182,"ium":183,"Ġpour":184,"Ġplus":185,"tre":186,"Ġb":187,"ent":188,"Ġre":189,"ce":190,"Ġg":191,"as":192,"eux":193,"ĠL":194,"ans":195,"è":196,"ain":197,"air":198,"Ġne":199,"ire":200,"pp":201,"que":202,"Ġac":203,"ours":204,"Ġy":205,"Ġma":206,"Ġtrou":207,"Ġtris":208,"Ġchant":209,"ĠM":210,"Ġce":211,"ons":212,"ar":213,"ver":214,"Ġest":215,"lle":216,"Ġo":217,"hor":218,"Ġqui":219,"Ġsou":220,"ment":221,"uran":222,"dre":223,"Ġni":224,"ag":225,"tes":226,"Ġcor":227,"és":228,"ins":229,"Ġver":230,"ois":231,"ux":232,"vant":233,"ys":234,"ĠV":235,"Ġsi":236,"erch":237,"Ġmoins":238,"iel":239,"Ġtout":240,"iè":241,"jours":242,"ne":243,"Ġent":244,"Ġcl":245,"Ġcar":246,"Ġdeux":247,"Ġson":248,"Ġdans":249,"Ġcherch":250,"Ġtoujours":251,"Ġbien":252,"ĠLa":253,"Ġacti":254,"elle":255},"merges":["d e","Ġ t","Ġ l","o u","Ġ e","Ġ p","Ġ m","Ġ c","Ġ de","â Ģ","âĢ Ļ","n t","Ġ s","r e","q u","à ©","e s","o n","e r","a i","Ġe t","Ġm o","Ġ qu","e u","Ġ a","Ġ d","Ġp a","m e","Ġmo de","Ġ n","Ġl e","Ġt r","l e","Ġl a","ou r","i n","o m","Ġqu e","i e","a nt","Ġt u","Ġ ou","a n","u s","Ġe n","eu r","i s","o r","Ġ v","Ġp l","à ł","u n","o s","i l","ai s","Ġ é","a u","i t","Ġ f","Ġs e","Ġc om","Ġc h","c h","Ġ Ãł","Ġt ou","Ġl es","Ġpa r","u m","Ġ j","t e","u r","on n","Ġm in","Ġde s","s t","Ġ ;","ie n","t i","Ġpa s","e n","Ġt on","Ġé t","Ġcom me","Ġmin eur","i um","Ġp our","Ġpl us","t re","Ġ b","e nt","Ġ re","c e","Ġ g","a s","eu x","Ġ L","an s","à ¨","ai n","ai r","Ġn e","i re","p p","qu e","Ġa c","our s","Ġ y","Ġm a","Ġtr ou","Ġtr is","Ġch ant","Ġ M","Ġc e","on s","a r","v er","Ġe st","l le","Ġ o","h or","Ġqu i","Ġs ou","me nt","ur an","d re","Ġn i","a g","t es","Ġc or","é s","in s","Ġv er","o is","u x","v ant","y s","Ġ V","Ġs i","er ch","Ġmo ins","ie l","Ġtou t","i è","j ours","n e","Ġe nt","Ġc l","Ġc ar","Ġde ux","Ġs on","Ġd ans","Ġch erch","Ġtou jours","Ġb ien","ĠL a","Ġac ti","e lle"]}}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "max_len": 512, "special_tokens_map_file": "local/tokenizer/roberta-tiny/special_tokens_map.json", "name_or_path": "local/tokenizer/roberta-tiny", "tokenizer_class": "RobertaTokenizer"}