KoichiYasuoka
/

llm-jp-1.3b-ud-causal

Token Classification

dependency-parsing

Model card Files Files and versions Community

KoichiYasuoka commited on Aug 30

Commit

860ac6f

•

1 Parent(s): aff0386

algorithm improved

Files changed (1) hide show

ud.py +12 -0

ud.py CHANGED Viewed

@@ -75,6 +75,18 @@ class UniversalDependenciesCausalPipeline(BellmanFordTokenClassificationPipeline
       else:
         t["entity_group"]=p
     d=[model_outputs["sentence"][t["start"]:t["end"]] for t in w]
     v=self.tokenizer(d,add_special_tokens=False)
     e=self.model.get_input_embeddings().weight
     m=[]

       else:
         t["entity_group"]=p
     d=[model_outputs["sentence"][t["start"]:t["end"]] for t in w]
+    for i in range(len(d)-1,-1,-1):
+      if d[i].startswith(" "):
+        j=len(d[i])-len(d[i].lstrip())
+        d[i]=d[i].lstrip()
+        w[i]["start"]+=j
+      if d[i].endswith(" "):
+        j=len(d[i])-len(d[i].rstrip())
+        d[i]=d[i].rstrip()
+        w[i]["end"]-=j
+      if d[i].strip()=="":
+        d.pop(i)
+        w.pop(i)
     v=self.tokenizer(d,add_special_tokens=False)
     e=self.model.get_input_embeddings().weight
     m=[]