File size: 528 Bytes
cf6f740
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#! /bin/sh
for D in classical-tibetan-corpus old-tibetan-corpus modern-tibetan-corpus
do test -d $D || git clone --depth=1 https://github.com/tibetan-nlp/$D
done
( for F in *-tibetan-corpus/conllu/*.conllu
  do case $F in
     *-translated.conllu) : ;;
     *) cat $F ;;
     esac
  done
) | awk '
{
  if($0==""){
    if(u!~/\tNOTAG\t/)
      print u;
    u="";
  }
  else
    u=u$0"\n";
}'> all.conllu
python3 -m esupar.train KoichiYasuoka/roberta-base-tibetan KoichiYasuoka/roberta-base-tibetan-upos 24 /tmp all.conllu
exit 0