ro_core_news_lg / meta.json
Adriane Boyd
Update spaCy pipeline
1f12b7f
raw
history blame
19.9 kB
{
"lang":"ro",
"name":"core_news_lg",
"version":"3.4.0",
"description":"Romanian pipeline optimized for CPU. Components: tok2vec, tagger, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.",
"author":"Explosion",
"email":"[email protected]",
"url":"https://explosion.ai",
"license":"CC BY-SA 4.0",
"spacy_version":">=3.4.0,<3.5.0",
"spacy_git_version":"dd038b536",
"vectors":{
"width":300,
"vectors":500000,
"keys":500000,
"name":"ro_vectors"
},
"labels":{
"tok2vec":[
],
"tagger":[
"ARROW",
"Af",
"Afcfp-n",
"Afcfson",
"Afcfsrn",
"Afcmpoy",
"Afcms-n",
"Afp",
"Afp-p-n",
"Afp-poy",
"Afp-srn",
"Afpf--n",
"Afpfp-n",
"Afpfp-ny",
"Afpfpoy",
"Afpfpry",
"Afpfson",
"Afpfsoy",
"Afpfsrn",
"Afpfsry",
"Afpm--n",
"Afpmp-n",
"Afpmpoy",
"Afpmpry",
"Afpms-n",
"Afpmsoy",
"Afpmsry",
"Afsfp-n",
"Afsfsrn",
"BULLET",
"COLON",
"COMMA",
"Ccssp",
"Ccsspy",
"Crssp",
"Csssp",
"Cssspy",
"DASH",
"DBLQ",
"Dd3-po---e",
"Dd3-po---o",
"Dd3fpo",
"Dd3fpr",
"Dd3fpr---e",
"Dd3fpr---o",
"Dd3fpr--y",
"Dd3fso",
"Dd3fso---e",
"Dd3fsr",
"Dd3fsr---e",
"Dd3fsr---o",
"Dd3fsr--yo",
"Dd3mpo",
"Dd3mpr",
"Dd3mpr---e",
"Dd3mpr---o",
"Dd3mso---e",
"Dd3msr",
"Dd3msr---e",
"Dd3msr---o",
"Dh1ms",
"Dh3fp",
"Dh3fso",
"Dh3fsr",
"Dh3mp",
"Dh3ms",
"Di3",
"Di3-----y",
"Di3--r---e",
"Di3-po",
"Di3-po---e",
"Di3-sr",
"Di3-sr---e",
"Di3-sr--y",
"Di3fp",
"Di3fpr",
"Di3fpr---e",
"Di3fso",
"Di3fso---e",
"Di3fsr",
"Di3fsr---e",
"Di3mp",
"Di3mpr",
"Di3mpr---e",
"Di3ms",
"Di3ms----e",
"Di3mso---e",
"Di3msr",
"Di3msr---e",
"Ds1fp-p",
"Ds1fp-s",
"Ds1fsop",
"Ds1fsos",
"Ds1fsrp",
"Ds1fsrs",
"Ds1fsrs-y",
"Ds1mp-p",
"Ds1mp-s",
"Ds1ms-p",
"Ds1ms-s",
"Ds1msrs-y",
"Ds2---s",
"Ds2fp-p",
"Ds2fp-s",
"Ds2fsrp",
"Ds2fsrs",
"Ds2mp-p",
"Ds2mp-s",
"Ds2ms-p",
"Ds2ms-s",
"Ds3---p",
"Ds3---s",
"Ds3---sy",
"Ds3fp-s",
"Ds3fsos",
"Ds3fsrs",
"Ds3mp-s",
"Ds3ms-s",
"Dw3--r---e",
"Dw3-po---e",
"Dw3fpr",
"Dw3fso---e",
"Dw3fsr",
"Dw3mpr",
"Dw3mso---e",
"Dw3msr",
"Dz3fsr---e",
"Dz3mso---e",
"Dz3msr---e",
"EQUAL",
"EXCL",
"EXCLHELLIP",
"GE",
"GT",
"HELLIP",
"I",
"LCURL",
"LPAR",
"LSQR",
"LT",
"M",
"Mc-p-d",
"Mc-p-l",
"Mc-s-b",
"Mc-s-d",
"Mc-s-l",
"Mcfp-l",
"Mcfp-ln",
"Mcfprln",
"Mcfprly",
"Mcfsoln",
"Mcfsrl",
"Mcfsrln",
"Mcfsrly",
"Mcmp-l",
"Mcms-ln",
"Mcmsrl",
"Mcmsrln",
"Mcmsrly",
"Mffprln",
"Mffsrln",
"Mlfpo",
"Mlfpr",
"Mlmpr",
"Mo---l",
"Mo---ln",
"Mo-s-r",
"Mofp-ln",
"Mofpoly",
"Mofprly",
"Mofs-l",
"Mofsoln",
"Mofsoly",
"Mofsrln",
"Mofsrly",
"Mompoly",
"Momprly",
"Moms-l",
"Moms-ln",
"Momsoly",
"Momsrly",
"Nc",
"Nc---n",
"Ncf--n",
"Ncfp-n",
"Ncfpoy",
"Ncfpry",
"Ncfs-n",
"Ncfson",
"Ncfsoy",
"Ncfsrn",
"Ncfsry",
"Ncfsryy",
"Ncfsvy",
"Ncm--n",
"Ncmp-n",
"Ncmpoy",
"Ncmpry",
"Ncms-n",
"Ncms-ny",
"Ncms-y",
"Ncmsoy",
"Ncmsrn",
"Ncmsry",
"Ncmsryy",
"Ncmsvn",
"Ncmsvy",
"Np",
"Npfson",
"Npfsoy",
"Npfsrn",
"Npfsry",
"Npmpoy",
"Npmpry",
"Npms-n",
"Npmsoy",
"Npmsry",
"PERCENT",
"PERIOD",
"PLUS",
"PLUSMINUS",
"Pd3-po",
"Pd3fpr",
"Pd3fso",
"Pd3fsr",
"Pd3mpo",
"Pd3mpr",
"Pd3mpr--y",
"Pd3mso",
"Pd3msr",
"Pi3--r",
"Pi3-po",
"Pi3-so",
"Pi3-sr",
"Pi3fpr",
"Pi3fso",
"Pi3fsr",
"Pi3mpr",
"Pi3mso",
"Pi3msr",
"Pi3msr--y",
"Pp1-pa--------w",
"Pp1-pa--y-----w",
"Pp1-pd--------s",
"Pp1-pd--------w",
"Pp1-pd--y-----w",
"Pp1-pr--------s",
"Pp1-sa--------s",
"Pp1-sa--------w",
"Pp1-sa--y-----w",
"Pp1-sd--------s",
"Pp1-sd--------w",
"Pp1-sd--y-----w",
"Pp1-sn--------s",
"Pp2-----------s",
"Pp2-pa--------w",
"Pp2-pa--y-----w",
"Pp2-pd--------w",
"Pp2-pd--y-----w",
"Pp2-pr--------s",
"Pp2-sa--------s",
"Pp2-sa--------w",
"Pp2-sa--y-----w",
"Pp2-sd--------s",
"Pp2-sd--------w",
"Pp2-sd--y-----w",
"Pp2-sn--------s",
"Pp2-so--------s",
"Pp2-sr--------s",
"Pp3-p---------s",
"Pp3-pd--------w",
"Pp3-pd--y-----w",
"Pp3-po--------s",
"Pp3-sd--------w",
"Pp3-sd--y-----w",
"Pp3-so--------s",
"Pp3fpa--------w",
"Pp3fpa--y-----w",
"Pp3fpr--------s",
"Pp3fs---------s",
"Pp3fsa--------w",
"Pp3fsa--y-----w",
"Pp3fso--------s",
"Pp3fsr--------s",
"Pp3fsr--y-----s",
"Pp3mpa--------w",
"Pp3mpa--y-----w",
"Pp3mpr--------s",
"Pp3ms---------s",
"Pp3msa--------w",
"Pp3msa--y-----w",
"Pp3mso--------s",
"Pp3msr--------s",
"Pp3msr--y-----s",
"Ps1fp-s",
"Ps1fsrp",
"Ps1fsrs",
"Ps1mp-p",
"Ps1ms-p",
"Ps2fp-s",
"Ps2fsrp",
"Ps2fsrs",
"Ps3---p",
"Ps3---s",
"Ps3fp-s",
"Ps3fsrs",
"Ps3mp-s",
"Ps3ms-s",
"Pw3--r",
"Pw3-po",
"Pw3-so",
"Pw3fpr",
"Pw3fso",
"Pw3mpr",
"Pw3mso",
"Px3--a--------s",
"Px3--a--------w",
"Px3--a--y-----w",
"Px3--d--------w",
"Px3--d--y-----w",
"Pz3-sr",
"Pz3fsr",
"QUEST",
"QUOT",
"Qf",
"Qn",
"Qs",
"Qs-y",
"Qz",
"Qz-y",
"RCURL",
"RPAR",
"RSQR",
"Rc",
"Rgp",
"Rgpy",
"Rgs",
"Rp",
"Rw",
"Rw-y",
"Rz",
"SCOLON",
"SLASH",
"STAR",
"Sp",
"Spsa",
"Spsay",
"Spsd",
"Spsg",
"Td-po",
"Tdfpr",
"Tdfso",
"Tdfsr",
"Tdmpr",
"Tdmso",
"Tdmsr",
"Tf-so",
"Tffpoy",
"Tffpry",
"Tffs-y",
"Tfmpoy",
"Tfms-y",
"Tfmsoy",
"Tfmsry",
"Ti-po",
"Tifp-y",
"Tifso",
"Tifsr",
"Timso",
"Timsr",
"Tsfp",
"Tsfs",
"Tsmp",
"Tsms",
"UNDERSC",
"Va--1",
"Va--1-----y",
"Va--1p",
"Va--1s",
"Va--1s----y",
"Va--2p",
"Va--2p----y",
"Va--2s",
"Va--2s----y",
"Va--3",
"Va--3-----y",
"Va--3p",
"Va--3p----y",
"Va--3s",
"Va--3s----y",
"Vag",
"Vag-------y",
"Vaii1",
"Vaii2s",
"Vaii3p",
"Vaii3s",
"Vail3p",
"Vail3s",
"Vaip1p",
"Vaip1s",
"Vaip2p",
"Vaip2s",
"Vaip3p",
"Vaip3p----y",
"Vaip3s",
"Vaip3s----y",
"Vais3p",
"Vais3s",
"Vam-2s",
"Vanp",
"Vap--sm",
"Vasp1p",
"Vasp1s",
"Vasp2p",
"Vasp2s",
"Vasp3",
"Vmg",
"Vmg-------y",
"Vmii1",
"Vmii1-----y",
"Vmii2p",
"Vmii2s",
"Vmii3p",
"Vmii3p----y",
"Vmii3s",
"Vmii3s----y",
"Vmil1",
"Vmil1p",
"Vmil2s",
"Vmil3p",
"Vmil3p----y",
"Vmil3s",
"Vmil3s----y",
"Vmip1p",
"Vmip1p----y",
"Vmip1s",
"Vmip1s----y",
"Vmip2p",
"Vmip2s",
"Vmip2s----y",
"Vmip3",
"Vmip3-----y",
"Vmip3p",
"Vmip3s",
"Vmip3s----y",
"Vmis1p",
"Vmis1s",
"Vmis3p",
"Vmis3p----y",
"Vmis3s",
"Vmis3s----y",
"Vmm-2p",
"Vmm-2s",
"Vmnp",
"Vmnp------y",
"Vmp--pf",
"Vmp--pm",
"Vmp--sf",
"Vmp--sm",
"Vmp--sm---y",
"Vmsp1p",
"Vmsp2p",
"Vmsp2s",
"Vmsp3",
"Vmsp3-----y",
"X",
"Y",
"Ya",
"Yn",
"Ynfsoy",
"Ynfsry",
"Ynmsoy",
"Ynmsry",
"Yp",
"Yp,Yn",
"Yp-sr",
"Yr",
"_SP"
],
"parser":[
"ROOT",
"acl",
"advcl",
"advcl:tcl",
"advmod",
"advmod:tmod",
"amod",
"appos",
"aux",
"aux:pass",
"case",
"cc",
"cc:preconj",
"ccomp",
"ccomp:pmod",
"compound",
"conj",
"cop",
"csubj",
"csubj:pass",
"dep",
"det",
"expl",
"expl:impers",
"expl:pass",
"expl:poss",
"expl:pv",
"fixed",
"flat",
"goeswith",
"iobj",
"mark",
"nmod",
"nmod:tmod",
"nsubj",
"nsubj:pass",
"nummod",
"obj",
"obl",
"obl:agent",
"obl:pmod",
"orphan",
"parataxis",
"punct",
"vocative",
"xcomp"
],
"attribute_ruler":[
],
"ner":[
"DATETIME",
"EVENT",
"FACILITY",
"GPE",
"LANGUAGE",
"LOC",
"MONEY",
"NAT_REL_POL",
"NUMERIC_VALUE",
"ORDINAL",
"ORGANIZATION",
"PERIOD",
"PERSON",
"PRODUCT",
"QUANTITY",
"WORK_OF_ART"
]
},
"pipeline":[
"tok2vec",
"tagger",
"parser",
"lemmatizer",
"attribute_ruler",
"ner"
],
"components":[
"tok2vec",
"tagger",
"parser",
"lemmatizer",
"senter",
"attribute_ruler",
"ner"
],
"disabled":[
"senter"
],
"performance":{
"token_acc":0.9990029326,
"token_p":0.9967350492,
"token_r":0.9957244934,
"token_f":0.9959492157,
"tag_acc":0.9665464567,
"sents_p":0.9718875502,
"sents_r":0.9654255319,
"sents_f":0.9686457638,
"dep_uas":0.8896207773,
"dep_las":0.8376148803,
"dep_las_per_type":{
"root":{
"p":0.8915009042,
"r":0.9284369115,
"f":0.9095940959
},
"mark":{
"p":0.9310344828,
"r":0.9169811321,
"f":0.9239543726
},
"case":{
"p":0.9620728502,
"r":0.9606299213,
"f":0.9613508443
},
"nmod:tmod":{
"p":0.5909090909,
"r":0.1092436975,
"f":0.1843971631
},
"amod":{
"p":0.9243986254,
"r":0.9165247019,
"f":0.9204448246
},
"nsubj":{
"p":0.8532695375,
"r":0.8451816746,
"f":0.8492063492
},
"nmod":{
"p":0.8296428571,
"r":0.8293466619,
"f":0.8294947331
},
"aux":{
"p":0.9755639098,
"r":0.9488117002,
"f":0.9620018536
},
"advcl":{
"p":0.6056338028,
"r":0.6466165414,
"f":0.6254545455
},
"obj":{
"p":0.8037974684,
"r":0.8799076212,
"f":0.8401323043
},
"det":{
"p":0.9567690557,
"r":0.9524348811,
"f":0.9545970488
},
"cc":{
"p":0.9368421053,
"r":0.9290187891,
"f":0.9329140461
},
"conj":{
"p":0.6128205128,
"r":0.5538818076,
"f":0.5818624467
},
"nummod":{
"p":0.9034810127,
"r":0.8866459627,
"f":0.894984326
},
"acl":{
"p":0.7900874636,
"r":0.7002583979,
"f":0.7424657534
},
"advmod":{
"p":0.8071979434,
"r":0.8284960422,
"f":0.8177083333
},
"obl":{
"p":0.6735112936,
"r":0.8324873096,
"f":0.7446083995
},
"expl:pass":{
"p":0.7959183673,
"r":0.7222222222,
"f":0.7572815534
},
"nsubj:pass":{
"p":0.7947019868,
"r":0.7317073171,
"f":0.7619047619
},
"fixed":{
"p":0.8747300216,
"r":0.8562367865,
"f":0.8653846154
},
"appos":{
"p":0.4978540773,
"r":0.4427480916,
"f":0.4686868687
},
"parataxis":{
"p":0.1290322581,
"r":0.1142857143,
"f":0.1212121212
},
"aux:pass":{
"p":0.9225806452,
"r":0.9533333333,
"f":0.937704918
},
"nmod:agent":{
"p":0.0,
"r":0.0,
"f":0.0
},
"ccomp":{
"p":0.8661417323,
"r":0.8527131783,
"f":0.859375
},
"nmod:pmod":{
"p":0.0,
"r":0.0,
"f":0.0
},
"iobj":{
"p":0.7777777778,
"r":0.7777777778,
"f":0.7777777778
},
"flat":{
"p":0.7806122449,
"r":0.8052631579,
"f":0.792746114
},
"cop":{
"p":0.8048780488,
"r":0.7983870968,
"f":0.8016194332
},
"csubj":{
"p":0.7,
"r":0.6666666667,
"f":0.6829268293
},
"obl:agent":{
"p":0.0,
"r":0.0,
"f":0.0
},
"obl:pmod":{
"p":0.0,
"r":0.0,
"f":0.0
},
"expl:pv":{
"p":0.7631578947,
"r":0.8405797101,
"f":0.8
},
"compound":{
"p":0.5,
"r":0.5714285714,
"f":0.5333333333
},
"expl":{
"p":0.6666666667,
"r":0.8148148148,
"f":0.7333333333
},
"ccomp:pmod":{
"p":0.0,
"r":0.0,
"f":0.0
},
"expl:poss":{
"p":1.0,
"r":0.9032258065,
"f":0.9491525424
},
"goeswith":{
"p":0.0,
"r":0.0,
"f":0.0
},
"xcomp":{
"p":0.5,
"r":0.5185185185,
"f":0.5090909091
},
"dep":{
"p":0.0,
"r":0.0,
"f":0.0
},
"orphan":{
"p":0.0,
"r":0.0,
"f":0.0
},
"expl:impers":{
"p":0.0,
"r":0.0,
"f":0.0
},
"list":{
"p":0.0,
"r":0.0,
"f":0.0
},
"csubj:pass":{
"p":0.0,
"r":0.0,
"f":0.0
},
"cc:preconj":{
"p":0.0,
"r":0.0,
"f":0.0
}
},
"lemma_acc":0.9583369983,
"pos_acc":0.9392520821,
"morph_acc":0.9511244026,
"morph_micro_p":0.9899534372,
"morph_micro_r":0.9580529239,
"morph_micro_f":0.9707703002,
"morph_per_feat":{
"Case":{
"p":0.9924764091,
"r":0.9898257662,
"f":0.9911493155
},
"Gender":{
"p":0.9937011842,
"r":0.9861232654,
"f":0.9898977223
},
"Number":{
"p":0.9901605971,
"r":0.9222585063,
"f":0.9550040905
},
"Person":{
"p":0.988221437,
"r":0.9888037714,
"f":0.9885125184
},
"PronType":{
"p":0.9958391123,
"r":0.992398065,
"f":0.9941156109
},
"Polarity":{
"p":0.9934747145,
"r":0.9983606557,
"f":0.9959116926
},
"AdpType":{
"p":0.998982706,
"r":0.9969543147,
"f":0.9979674797
},
"Definite":{
"p":0.9899151593,
"r":0.9815873016,
"f":0.9857336415
},
"Degree":{
"p":0.9585597826,
"r":0.9476158496,
"f":0.9530563999
},
"VerbForm":{
"p":0.979305741,
"r":0.9753989362,
"f":0.9773484344
},
"Abbr":{
"p":0.9603960396,
"r":0.8660714286,
"f":0.9107981221
},
"Poss":{
"p":1.0,
"r":0.9927710843,
"f":0.9963724305
},
"NumForm":{
"p":0.9871794872,
"r":0.3181818182,
"f":0.48125
},
"NumType":{
"p":0.9872881356,
"r":0.3200549451,
"f":0.4834024896
},
"Reflex":{
"p":1.0,
"r":0.9935897436,
"f":0.9967845659
},
"Strength":{
"p":0.9919678715,
"r":0.9801587302,
"f":0.9860279441
},
"Mood":{
"p":0.9692028986,
"r":0.9816513761,
"f":0.9753874202
},
"Tense":{
"p":0.9725433526,
"r":0.9781976744,
"f":0.9753623188
},
"Variant":{
"p":0.9932432432,
"r":0.9483870968,
"f":0.9702970297
},
"Position":{
"p":0.9910714286,
"r":0.9910714286,
"f":0.9910714286
},
"Number[psor]":{
"p":1.0,
"r":0.9666666667,
"f":0.9830508475
},
"PartType":{
"p":1.0,
"r":0.9459459459,
"f":0.9722222222
},
"Foreign":{
"p":0.0,
"r":0.0,
"f":0.0
}
},
"ents_p":0.7507507508,
"ents_r":0.7683442182,
"ents_f":0.7594456047,
"ents_per_type":{
"DATETIME":{
"p":0.788590604,
"r":0.818815331,
"f":0.8034188034
},
"ORGANIZATION":{
"p":0.6896551724,
"r":0.7006369427,
"f":0.6951026856
},
"FACILITY":{
"p":0.528,
"r":0.5038167939,
"f":0.515625
},
"NUMERIC_VALUE":{
"p":0.9159663866,
"r":0.9237288136,
"f":0.9198312236
},
"ORDINAL":{
"p":0.813559322,
"r":0.8727272727,
"f":0.8421052632
},
"EVENT":{
"p":0.6216216216,
"r":0.6216216216,
"f":0.6216216216
},
"GPE":{
"p":0.8483516484,
"r":0.8873563218,
"f":0.8674157303
},
"PERSON":{
"p":0.6973478939,
"r":0.75,
"f":0.722716249
},
"NAT_REL_POL":{
"p":0.9507042254,
"r":0.9,
"f":0.9246575342
},
"MONEY":{
"p":0.9791666667,
"r":0.8103448276,
"f":0.8867924528
},
"PRODUCT":{
"p":0.5147058824,
"r":0.5109489051,
"f":0.5128205128
},
"LOC":{
"p":0.5443037975,
"r":0.5657894737,
"f":0.5548387097
},
"WORK_OF_ART":{
"p":0.4166666667,
"r":0.2631578947,
"f":0.3225806452
},
"QUANTITY":{
"p":0.7058823529,
"r":0.9230769231,
"f":0.8
},
"PERIOD":{
"p":0.8285714286,
"r":0.6904761905,
"f":0.7532467532
},
"LANGUAGE":{
"p":0.6666666667,
"r":1.0,
"f":0.8
}
},
"speed":9772.4664568833
},
"sources":[
{
"name":"UD Romanian RRT v2.8",
"url":"https://github.com/UniversalDependencies/UD_Romanian-RRT",
"license":"CC BY-SA 4.0",
"author":"Barbu Mititelu, Verginica; Irimia, Elena; Perez, Cenel-Augusto; Ion, Radu; Simionescu, Radu; Popel, Martin"
},
{
"name":"RONEC - the Romanian Named Entity Corpus (ca9ce460)",
"url":"https://github.com/dumitrescustefan/ronec",
"license":"MIT",
"author":"Dumitrescu, Stefan Daniel; Avram, Andrei-Marius; Morogan, Luciana; Toma; Stefan"
},
{
"name":"Explosion fastText Vectors (cbow, OSCAR Common Crawl + Wikipedia)",
"url":"https://spacy.io",
"license":"CC0",
"author":"Explosion"
}
],
"requirements":[
]
}